apache · yaooqinn · Jan 23, 2025 · Jan 23, 2025 · Jan 24, 2025 · Feb 10, 2025
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SparkDataFramePi.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SparkDataFramePi.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.sql
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.functions._
+
+/** Computes an approximation to pi with SparkSession/DataFrame APIs */
+object SparkDataFramePi {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName("Spark DataFrame Pi")
+      .getOrCreate()
+    import spark.implicits._
+    val slices = if (args.length > 0) args(0).toInt else 2
+    val n = math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow
+    val count = spark.range(0, n, 1, slices)
+      .select((pow(rand() * 2 - 1, lit(2)) + pow(rand() * 2 - 1, lit(2))).as("v"))
+      .where($"v" <= 1)
+      .count()
+    println(s"Pi is roughly ${4.0 * count / (n - 1)}")
+    spark.stop()
+  }
+}
+// scalastyle:on println