Skip to content

Commit

Permalink
Fix Hodges-Lehmann distribution ratio calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
mgrzaslewicz committed Jul 22, 2024
1 parent 0c74c21 commit fee23ec
Show file tree
Hide file tree
Showing 9 changed files with 1,339 additions and 15 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ Dropping a requirement of a major version of a dependency is a new contract.
## [Unreleased]
[Unreleased]: https://github.com/atlassian/report/compare/release-4.5.0...master

### Added
- Add `DistributionComparator` which uses a proper Hodges-Lehmann implementation using pseudo-median instead of median

### Fixed
- Fix calculating distribution ratio in `RelativeNonparametricPerformanceJudge` by `DistributionComparator`

### Deprecated
- Deprecate `ShiftedDistributionRegressionTest` in favor of `DistributionComparator`


## [4.5.0] - 2024-07-01
[4.5.0]: https://github.com/atlassian/report/compare/release-4.4.0...release-4.5.0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ import org.apache.commons.math3.stat.descriptive.rank.Median
* @experiment experiment durations
* @param mwAlpha Mann-Whitney significance level
* @param ksAlpha Kolmogorov-Smirnov significance level
* @deprecated Use [DistributionComparator] instead
*/
@Deprecated("Use DistributionComparator instead")
class ShiftedDistributionRegressionTest(
private val baseline: DoubleArray,
private val experiment: DoubleArray,
Expand Down Expand Up @@ -57,7 +59,12 @@ class ShiftedDistributionRegressionTest(
}

internal fun overcomesTolerance(tolerance: Double): Boolean {
return isExperimentRegressed(tolerance) || isExperimentImproved(tolerance)
val isExperimentRegressed = isExperimentRegressed(tolerance)
val isExperimentImproved = isExperimentImproved(tolerance)
if (isExperimentImproved && isExperimentRegressed) {
throw IllegalArgumentException("Experiment can't be both regressed and improved at the same time")
}
return isExperimentRegressed || isExperimentImproved
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package com.atlassian.performance.tools.report.api.distribution

import com.numericalmethod.suanshu.stats.test.rank.wilcoxon.WilcoxonRankSum
import org.apache.commons.math3.stat.descriptive.rank.Median
import org.apache.commons.math3.stat.ranking.NaNStrategy

class DistributionComparator private constructor(
private val baseline: DoubleArray,
private val experiment: DoubleArray,
/**
* A percentage by which experiment can be slower/faster than baseline and not considered as a regression/improvement
*/
private val tolerance: Double,
private val significance: Double
) {



/**
* Performs a one-tailed Mann–Whitney U test to check whether experiment is not slower than the baseline
*
* @return true if the experiment is slower than the baseline by more than tolerance, false otherwise
*/
private fun isExperimentRegressed(baselineMedian: Double): Boolean {
val mu = -tolerance * baselineMedian
return WilcoxonRankSum(baseline, experiment, mu).pValue1SidedLess < significance
}

private fun isExperimentImproved(baselineMedian: Double): Boolean {
val mu = -tolerance * baselineMedian
val wilcoxon = WilcoxonRankSum(experiment, baseline, mu)
return wilcoxon.pValue1SidedLess < significance
}

/**
* Pseudo-median: the median of the Walsh (pairwise) averages
*/
private fun pseudoMedian(array: DoubleArray): Double {
val n = array.size
val size = n * (n + 1) / 2 - n
val values = DoubleArray(size)
var k = 0
for (i in 0 until n) {
for (j in i + 1 until n) {
values[k++] = (array[i] + array[j]) / 2
}
}
return Median().evaluate(values)
}

private fun median(func: (xi: Double, yj: Double) -> Double): Double {
val values = DoubleArray(baseline.size * experiment.size)
var k = 0
for (i in baseline.indices) {
for (j in experiment.indices) {
values[k++] = func(baseline[i], experiment[j])
}
}
return Median().withNaNStrategy(NaNStrategy.MINIMAL).evaluate(values)
}

private fun shift(): Double {
return median { xi, yj -> yj - xi }
}

private fun ratio(): Double {
return median { xi, yj -> yj / xi }
}

/**
* Calculates the distance between two data sets based on the [Hodges-Lehmann estimator][].
* [Hodges-Lehmann estimator]: https://en.wikipedia.org/wiki/Hodges%E2%80%93Lehmann_estimator
* https://aakinshin.net/hodges-lehmann-estimator/
* https://github.com/AndreyAkinshin/perfolizer/blob/master/src/Perfolizer/Perfolizer/Mathematics/GenericEstimators/HodgesLehmannEstimator.cs
*
* Takes into account tolerance which answers the question "is change is big enough to matter?"
*/
fun compare(): DistributionComparison {
val experimentShift = shift()
val baselineMedian = pseudoMedian(baseline)
val experimentRatio = ratio()
val isExperimentImproved = isExperimentImproved(baselineMedian)
val isExperimentRegressed = isExperimentRegressed(baselineMedian)
val experimentRelativeChange = experimentRatio - 1
return DistributionComparison(
experimentRelativeChange = experimentRelativeChange,
experimentAbsoluteChange = experimentShift,
isExperimentRegressed = isExperimentRegressed,
isExperimentImproved = isExperimentImproved
)
}

class Builder(
private var baseline: DoubleArray,
private var experiment: DoubleArray
) {
private var significance: Double = 0.05
private var tolerance: Double = 0.01

fun significance(significance: Double) = apply { this.significance = significance }
fun tolerance(tolerance: Double) = apply { this.tolerance = tolerance }
fun baseline(baseline: DoubleArray) = apply { this.baseline = baseline }
fun experiment(experiment: DoubleArray) = apply { this.experiment = experiment }

fun build() = DistributionComparator(
baseline = baseline,
experiment = experiment,
tolerance = tolerance,
significance = significance
)

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.atlassian.performance.tools.report.api.distribution

class DistributionComparison(
val experimentRelativeChange: Double,
val experimentAbsoluteChange: Double,
val isExperimentRegressed: Boolean,
val isExperimentImproved: Boolean
) {

init {
if (isExperimentImproved && isExperimentRegressed) {
throw IllegalArgumentException("Experiment can't be both regressed and improved at the same time")
}
}

fun hasImpact() = isExperimentRegressed || isExperimentImproved

}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package com.atlassian.performance.tools.report.api.judge

import com.atlassian.performance.tools.jiraactions.api.ActionType
import com.atlassian.performance.tools.report.ActionMetricsReader
import com.atlassian.performance.tools.report.api.ShiftedDistributionRegressionTest
import com.atlassian.performance.tools.report.api.distribution.DistributionComparator
import com.atlassian.performance.tools.report.api.junit.FailedAssertionJUnitReport
import com.atlassian.performance.tools.report.api.junit.JUnitReport
import com.atlassian.performance.tools.report.api.junit.SuccessfulJUnitReport
Expand Down Expand Up @@ -70,14 +70,16 @@ class RelativeNonparametricPerformanceJudge private constructor(
report = FailedAssertionJUnitReport(reportName, "No action $label results for $experimentCohort"),
action = action
)
val test = ShiftedDistributionRegressionTest(baseline, experiment, mwAlpha = significance, ksAlpha = 0.0)
// shifts are negated, because ShiftedDistributionRegressionTest is relative to experiment, instead of baseline
val comparison = DistributionComparator.Builder(baseline, experiment)
.tolerance(toleranceRatio.toDouble())
.build()
.compare()
val impact = LatencyImpact.Builder(
action,
-test.percentageShift,
reader.convertToDuration(-test.locationShift)
comparison.experimentRelativeChange,
reader.convertToDuration(comparison.experimentAbsoluteChange)
)
.relevant(test.overcomesTolerance(toleranceRatio.toDouble()))
.relevant(comparison.hasImpact())
.build()
impactHandlers.forEach { it.accept(impact) }
return if (impact.regression) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.atlassian.performance.tools.report

import com.atlassian.performance.tools.jiraactions.api.*
import com.atlassian.performance.tools.report.api.ShiftedDistributionRegressionTest
import com.atlassian.performance.tools.report.api.result.FakeResults
import com.atlassian.performance.tools.report.chart.Chart
Expand All @@ -12,7 +11,7 @@ import org.apache.commons.math3.distribution.NormalDistribution
import org.apache.commons.math3.distribution.NormalDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY
import org.apache.commons.math3.random.MersenneTwister
import org.assertj.core.api.Assertions.assertThat
import org.assertj.core.api.SoftAssertions.*
import org.assertj.core.api.SoftAssertions.assertSoftly
import org.assertj.core.data.Offset
import org.junit.Ignore
import org.junit.Test
Expand Down Expand Up @@ -112,7 +111,7 @@ class ShiftedDistributionRegressionTestTest {
}

/**
* In a 51% vs 49% case, small diffs should not dominate the big diffs.
* In a 51% slightly faster vs 49% much slower case, it should be a regression
*/
@Ignore("https://ecosystem.atlassian.net/browse/JPERF-1297")
@Test
Expand Down Expand Up @@ -152,16 +151,39 @@ class ShiftedDistributionRegressionTestTest {
}
}

@Test
// @Ignore("percentageShift calculation is fixed in in DistributionComparator where Hodges-Lehmann estimator with pseudo-median is used")
fun shouldDetectImprovementWhenEveryPercentileBetter() {
// given
val baseline =
this.javaClass.getResource("/real-results/view issue 9.17.0 vs 10.0.0/baseline.csv").readText().lines()
.map { it.toDouble() }.toDoubleArray()
val experiment =
this.javaClass.getResource("/real-results/view issue 9.17.0 vs 10.0.0/experiment.csv").readText().lines()
.map { it.toDouble() }.toDoubleArray()
// when
val test = ShiftedDistributionRegressionTest(baseline, experiment)
// then
plotQuantiles(baseline, experiment)
assertSoftly {
it.assertThat(test.isExperimentRegressed(0.01)).`as`("isExperimentRegressed").isFalse()
it.assertThat(test.percentageShift).`as`("").`as`("percentageShift").isEqualTo(0.03941908713692943)
it.assertThat(test.locationShift).`as`("").`as`("locationShift").isEqualTo(20.0)
it.assertThat(test.overcomesTolerance(0.01)).`as`("overcomesTolerance").isTrue()
}
}


private fun plotQuantiles(
baseline: DoubleArray,
experiment: DoubleArray
) {
val chart = Chart(listOf(
chartLine(baseline, "baseline"),
chartLine(experiment, "experiment")
))
val chart = Chart(
listOf(
chartLine(baseline, "baseline"),
chartLine(experiment, "experiment")
)
)
val htmlFile = Files.createTempFile("kebab", ".html")
.also { println("Distribution comparison at $it") }
DistributionComparison(GitRepo.findFromCurrentDirectory()).render(chart, htmlFile)
Expand All @@ -174,7 +196,6 @@ class ShiftedDistributionRegressionTestTest {
yAxisId = "latency-axis"
)

@Ignore("Known bug: https://ecosystem.atlassian.net/browse/JPERF-1188")
@Test
fun shouldSeeNoShiftAcrossTheSameResult() {
val result = FakeResults.fastResult
Expand Down
Loading

0 comments on commit fee23ec

Please sign in to comment.