@@ -26,6 +26,7 @@ import org.scalatest.time.SpanSugar._
26
26
27
27
import org .apache .spark .SparkException
28
28
import org .apache .spark .scheduler .{SparkListener , SparkListenerEvent , SparkListenerJobStart }
29
+ import org .apache .spark .shuffle .sort .SortShuffleManager
29
30
import org .apache .spark .sql .{Dataset , QueryTest , Row , SparkSession , Strategy }
30
31
import org .apache .spark .sql .catalyst .optimizer .{BuildLeft , BuildRight }
31
32
import org .apache .spark .sql .catalyst .plans .logical .{Aggregate , LogicalPlan }
@@ -59,7 +60,8 @@ class AdaptiveQueryExecSuite
59
60
60
61
setupTestData()
61
62
62
- private def runAdaptiveAndVerifyResult (query : String ): (SparkPlan , SparkPlan ) = {
63
+ private def runAdaptiveAndVerifyResult (query : String ,
64
+ skipCheckAnswer : Boolean = false ): (SparkPlan , SparkPlan ) = {
63
65
var finalPlanCnt = 0
64
66
val listener = new SparkListener {
65
67
override def onOtherEvent (event : SparkListenerEvent ): Unit = {
@@ -80,8 +82,10 @@ class AdaptiveQueryExecSuite
80
82
assert(planBefore.toString.startsWith(" AdaptiveSparkPlan isFinalPlan=false" ))
81
83
val result = dfAdaptive.collect()
82
84
withSQLConf(SQLConf .ADAPTIVE_EXECUTION_ENABLED .key -> " false" ) {
83
- val df = sql(query)
84
- checkAnswer(df, result)
85
+ if (! skipCheckAnswer) {
86
+ val df = sql(query)
87
+ checkAnswer(df, result)
88
+ }
85
89
}
86
90
val planAfter = dfAdaptive.queryExecution.executedPlan
87
91
assert(planAfter.toString.startsWith(" AdaptiveSparkPlan isFinalPlan=true" ))
@@ -2390,6 +2394,28 @@ class AdaptiveQueryExecSuite
2390
2394
}
2391
2395
}
2392
2396
2397
+ test(" SPARK-48037: Fix SortShuffleWriter lacks shuffle write related metrics " +
2398
+ " resulting in potentially inaccurate data" ) {
2399
+ withTable(" t3" ) {
2400
+ withSQLConf(
2401
+ SQLConf .ADAPTIVE_EXECUTION_ENABLED .key -> " true" ,
2402
+ SQLConf .SHUFFLE_PARTITIONS .key -> (SortShuffleManager
2403
+ .MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE + 1 ).toString) {
2404
+ sql(" CREATE TABLE t3 USING PARQUET AS SELECT id FROM range(2)" )
2405
+ val (plan, adaptivePlan) = runAdaptiveAndVerifyResult(
2406
+ """
2407
+ |SELECT id, count(*)
2408
+ |FROM t3
2409
+ |GROUP BY id
2410
+ |LIMIT 1
2411
+ |""" .stripMargin, skipCheckAnswer = true )
2412
+ // The shuffle stage produces two rows and the limit operator should not been optimized out.
2413
+ assert(findTopLevelLimit(plan).size == 1 )
2414
+ assert(findTopLevelLimit(adaptivePlan).size == 1 )
2415
+ }
2416
+ }
2417
+ }
2418
+
2393
2419
test(" SPARK-37063: OptimizeSkewInRebalancePartitions support optimize non-root node" ) {
2394
2420
withTempView(" v" ) {
2395
2421
withSQLConf(
0 commit comments