Skip to content

Commit dbad951

Browse files
authored
Support Spark 3.1.1 (#836)
1 parent 04b9a33 commit dbad951

File tree

11 files changed

+92
-14
lines changed

11 files changed

+92
-14
lines changed

azure-pipelines.yml

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,54 @@ variables:
4444
forwardCompatibleTestOptions_Windows_3_0_2: "--filter FullyQualifiedName=NONE"
4545
forwardCompatibleTestOptions_Linux_3_0_2: $(forwardCompatibleTestOptions_Windows_3_0_2)
4646

47+
# Skip backwardCompatible tests because Microsoft.Spark.Worker requires Spark 3.1 support in
48+
# CommandProcessor.cs and TaskContextProcessor.cs. Support added in https://github.com/dotnet/spark/pull/836
49+
backwardCompatibleTestOptions_Windows_3_1: "--filter \
50+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestDataFrameGroupedMapUdf)&\
51+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestGroupedMapUdf&\
52+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfRegistrationWithReturnAsRowType)&\
53+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithArrayChain)&\
54+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithSimpleArrayType)&\
55+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithMapType)&\
56+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithRowArrayType)&\
57+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithReturnAsMapType)&\
58+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithReturnAsArrayOfArrayType)&\
59+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithArrayOfArrayType)&\
60+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithMapOfMapType)&\
61+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithReturnAsSimpleArrayType)&\
62+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithRowType)&\
63+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithReturnAsRowType)&\
64+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSerDeTests.TestExternalStaticMethodCall)&\
65+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSerDeTests.TestInitExternalClassInUdf)&\
66+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSerDeTests.TestUdfClosure)&\
67+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSimpleTypesTests.TestUdfWithReturnAsDateType)&\
68+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSimpleTypesTests.TestUdfWithReturnAsTimestampType)&\
69+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSimpleTypesTests.TestUdfWithDateType)&\
70+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSimpleTypesTests.TestUdfWithTimestampType)&\
71+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.BroadcastTests.TestMultipleBroadcast)&\
72+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.BroadcastTests.TestUnpersist)&\
73+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.BroadcastTests.TestDestroy)&\
74+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.PairRDDFunctionsTests.TestCollect)&\
75+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestPipelinedRDD)&\
76+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestMap)&\
77+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestFlatMap)&\
78+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestMapPartitions)&\
79+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestMapPartitionsWithIndex)&\
80+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestTextFile)&\
81+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestFilter)&\
82+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestDataFrameVectorUdf)&\
83+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestVectorUdf)&\
84+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestWithColumn)&\
85+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestUDF)&\
86+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.SparkSessionExtensionsTests.TestVersion)&\
87+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataStreamWriterTests.TestForeachBatch)&\
88+
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataStreamWriterTests.TestForeach)"
89+
# Skip all forwardCompatible tests since microsoft-spark-3-1 jar does not get built when
90+
# building forwardCompatible repo.
91+
forwardCompatibleTestOptions_Windows_3_1: "--filter FullyQualifiedName=NONE"
92+
backwardCompatibleTestOptions_Linux_3_1: $(backwardCompatibleTestOptions_Windows_3_1)
93+
forwardCompatibleTestOptions_Linux_3_1: $(forwardCompatibleTestOptions_Windows_3_1)
94+
4795
# Azure DevOps variables are transformed into environment variables, with these variables we
4896
# avoid the first time experience and telemetry to speed up the build.
4997
DOTNET_CLI_TELEMETRY_OPTOUT: 1
@@ -361,3 +409,13 @@ stages:
361409
testOptions: ""
362410
backwardCompatibleTestOptions: $(backwardCompatibleTestOptions_Linux_3_0)
363411
forwardCompatibleTestOptions: $(forwardCompatibleTestOptions_Linux_3_0_2)
412+
- version: '3.1.1'
413+
jobOptions:
414+
- pool: 'Hosted VS2017'
415+
testOptions: ""
416+
backwardCompatibleTestOptions: $(backwardCompatibleTestOptions_Windows_3_1)
417+
forwardCompatibleTestOptions: $(backwardCompatibleTestOptions_Windows_3_1)
418+
- pool: 'Hosted Ubuntu 1604'
419+
testOptions: ""
420+
backwardCompatibleTestOptions: $(backwardCompatibleTestOptions_Linux_3_1)
421+
forwardCompatibleTestOptions: $(forwardCompatibleTestOptions_Linux_3_1)

src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaTableTests.cs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ public DeltaTableTests(DeltaFixture fixture)
3030
/// Run the end-to-end scenario from the Delta Quickstart tutorial.
3131
/// </summary>
3232
/// <see cref="https://docs.delta.io/latest/quick-start.html"/>
33-
[SkipIfSparkVersionIsLessThan(Versions.V2_4_2)]
33+
///
34+
/// Delta 0.8.0 is not compatible with Spark 3.1.1
35+
/// Disable Delta tests that have code paths that create an
36+
/// `org.apache.spark.sql.catalyst.expressions.Alias` object.
37+
[SkipIfSparkVersionIsNotInRange(Versions.V2_4_2, Versions.V3_1_1)]
3438
public void TestTutorialScenario()
3539
{
3640
using var tempDirectory = new TemporaryDirectory();
@@ -223,7 +227,11 @@ void testWrapper(
223227
/// <summary>
224228
/// Test that methods return the expected signature.
225229
/// </summary>
226-
[SkipIfSparkVersionIsLessThan(Versions.V2_4_2)]
230+
///
231+
/// Delta 0.8.0 is not compatible with Spark 3.1.1
232+
/// Disable Delta tests that have code paths that create an
233+
/// `org.apache.spark.sql.catalyst.expressions.Alias` object.
234+
[SkipIfSparkVersionIsNotInRange(Versions.V2_4_2, Versions.V3_1_1)]
227235
public void TestSignaturesV2_4_X()
228236
{
229237
using var tempDirectory = new TemporaryDirectory();

src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterTests.cs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,7 @@ public void TestSignaturesV2_3_X()
6969

7070
// TODO: Test dfw.Jdbc without running a local db.
7171

72-
dfw.Option("path", tempDir.Path).SaveAsTable("TestTable");
73-
74-
dfw.InsertInto("TestTable");
75-
76-
dfw.Option("path", $"{tempDir.Path}TestSavePath1").Save();
77-
dfw.Save($"{tempDir.Path}TestSavePath2");
72+
dfw.Save($"{tempDir.Path}TestSavePath1");
7873

7974
dfw.Json($"{tempDir.Path}TestJsonPath");
8075

@@ -85,6 +80,16 @@ public void TestSignaturesV2_3_X()
8580
dfw.Text($"{tempDir.Path}TestTextPath");
8681

8782
dfw.Csv($"{tempDir.Path}TestCsvPath");
83+
84+
dfw.Option("path", tempDir.Path).SaveAsTable("TestTable");
85+
86+
dfw.InsertInto("TestTable");
87+
88+
// In Spark 3.1.1+ setting the `path` Option and then calling .Save(path) is not
89+
// supported unless `spark.sql.legacy.pathOptionBehavior.enabled` conf is set.
90+
// .Json(path), .Parquet(path), etc follow the same code path so the conf
91+
// needs to be set in these scenarios as well.
92+
dfw.Option("path", $"{tempDir.Path}TestSavePath2").Save();
8893
}
8994
}
9095
}

src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamReaderTests.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ public void TestSignaturesV2_3_X()
5252
}));
5353

5454
string jsonFilePath = Path.Combine(TestEnvironment.ResourceDirectory, "people.json");
55-
Assert.IsType<DataFrame>(dsr.Format("json").Option("path", jsonFilePath).Load());
5655
Assert.IsType<DataFrame>(dsr.Format("json").Load(jsonFilePath));
5756
Assert.IsType<DataFrame>(dsr.Json(jsonFilePath));
5857
Assert.IsType<DataFrame>(
@@ -63,6 +62,12 @@ public void TestSignaturesV2_3_X()
6362
dsr.Parquet(Path.Combine(TestEnvironment.ResourceDirectory, "users.parquet")));
6463
Assert.IsType<DataFrame>
6564
(dsr.Text(Path.Combine(TestEnvironment.ResourceDirectory, "people.txt")));
65+
66+
// In Spark 3.1.1+ setting the `path` Option and then calling .Load(path) is not
67+
// supported unless `spark.sql.legacy.pathOptionBehavior.enabled` conf is set.
68+
// .Json(path), .Parquet(path), etc follow the same code path so the conf
69+
// needs to be set in these scenarios as well.
70+
Assert.IsType<DataFrame>(dsr.Format("json").Option("path", jsonFilePath).Load());
6671
}
6772
}
6873
}

src/csharp/Microsoft.Spark.Worker/Processor/CommandProcessor.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ private static SqlCommand[] ReadSqlCommands(
9999
{
100100
(2, 3) => SqlCommandProcessorV2_3_X.Process(evalType, stream),
101101
(2, 4) => SqlCommandProcessorV2_4_X.Process(evalType, stream),
102-
(3, 0) => SqlCommandProcessorV2_4_X.Process(evalType, stream),
102+
(3, _) => SqlCommandProcessorV2_4_X.Process(evalType, stream),
103103
_ => throw new NotSupportedException($"Spark {version} not supported.")
104104
};
105105
}

src/csharp/Microsoft.Spark.Worker/Processor/TaskContextProcessor.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ internal TaskContext Process(Stream stream)
2323
{
2424
(2, 3) => TaskContextProcessorV2_3_X.Process(stream),
2525
(2, 4) => TaskContextProcessorV2_4_X.Process(stream),
26-
(3, 0) => TaskContextProcessorV3_0_X.Process(stream),
26+
(3, _) => TaskContextProcessorV3_0_X.Process(stream),
2727
_ => throw new NotSupportedException($"Spark {_version} not supported.")
2828
};
2929
}

src/csharp/Microsoft.Spark/Broadcast.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ private JvmObjectReference CreateBroadcast(SparkContext sc, T value)
129129
CreateBroadcast_V2_3_1_AndBelow(javaSparkContext, value),
130130
(2, 3) => CreateBroadcast_V2_3_2_AndAbove(javaSparkContext, sc, value),
131131
(2, 4) => CreateBroadcast_V2_3_2_AndAbove(javaSparkContext, sc, value),
132-
(3, 0) => CreateBroadcast_V2_3_2_AndAbove(javaSparkContext, sc, value),
132+
(3, _) => CreateBroadcast_V2_3_2_AndAbove(javaSparkContext, sc, value),
133133
_ => throw new NotSupportedException($"Spark {version} not supported.")
134134
};
135135
}

src/csharp/Microsoft.Spark/Sql/DataFrame.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1057,7 +1057,7 @@ private IEnumerable<Row> GetRows(string funcName, params object[] args)
10571057
// string to use for the authentication.
10581058
(2, 3, _) => ParseConnectionInfo(result, false),
10591059
(2, 4, _) => ParseConnectionInfo(result, false),
1060-
(3, 0, _) => ParseConnectionInfo(result, false),
1060+
(3, _, _) => ParseConnectionInfo(result, false),
10611061
_ => throw new NotSupportedException($"Spark {version} not supported.")
10621062
};
10631063
}

src/csharp/Microsoft.Spark/Versions.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,6 @@ internal static class Versions
1313
internal const string V2_4_0 = "2.4.0";
1414
internal const string V2_4_2 = "2.4.2";
1515
internal const string V3_0_0 = "3.0.0";
16+
internal const string V3_1_1 = "3.1.1";
1617
}
1718
}

src/scala/microsoft-spark-3-0/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
<inceptionYear>2019</inceptionYear>
1111
<properties>
1212
<encoding>UTF-8</encoding>
13-
<scala.version>2.12.8</scala.version>
13+
<scala.version>2.12.10</scala.version>
1414
<scala.binary.version>2.12</scala.binary.version>
1515
<spark.version>3.0.0</spark.version>
1616
</properties>

0 commit comments

Comments
 (0)