-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
improve average performance of long task timer for out-of-order stopping #5591
base: main
Are you sure you want to change the base?
Changes from all commits
6b73a9f
44f147a
d8dcd3b
9396e4d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -27,28 +27,35 @@ | |||||||||||||||||
|
||||||||||||||||||
import java.util.*; | ||||||||||||||||||
import java.util.concurrent.ArrayBlockingQueue; | ||||||||||||||||||
import java.util.concurrent.ConcurrentLinkedDeque; | ||||||||||||||||||
import java.util.concurrent.CopyOnWriteArrayList; | ||||||||||||||||||
import java.util.concurrent.ConcurrentSkipListSet; | ||||||||||||||||||
import java.util.concurrent.TimeUnit; | ||||||||||||||||||
import java.util.concurrent.atomic.AtomicInteger; | ||||||||||||||||||
import java.util.function.Consumer; | ||||||||||||||||||
import java.util.stream.Collectors; | ||||||||||||||||||
import java.util.stream.StreamSupport; | ||||||||||||||||||
|
||||||||||||||||||
public class DefaultLongTaskTimer extends AbstractMeter implements LongTaskTimer { | ||||||||||||||||||
|
||||||||||||||||||
/** | ||||||||||||||||||
* Preferring {@link ConcurrentLinkedDeque} over {@link CopyOnWriteArrayList} here | ||||||||||||||||||
* Preferring {@link ConcurrentSkipListSet} over other concurrent collections | ||||||||||||||||||
* because... | ||||||||||||||||||
* <p> | ||||||||||||||||||
* Retrieval of percentile values will be O(N) but starting/stopping tasks will be | ||||||||||||||||||
* O(1). Starting/stopping tasks happen in the same thread as the main application | ||||||||||||||||||
* code, where publishing generally happens in a separate thread. Also, shipping | ||||||||||||||||||
* client-side percentiles should be relatively uncommon. | ||||||||||||||||||
* O(log(N)). Starting/stopping tasks happen in the same thread as the main | ||||||||||||||||||
* application code, where publishing generally happens in a separate thread. Also, | ||||||||||||||||||
* shipping client-side percentiles should be relatively uncommon. | ||||||||||||||||||
* <p> | ||||||||||||||||||
* Tasks are naturally ordered at time of insertion, but task completion/removal can | ||||||||||||||||||
* happen out-of-order, that causes removal of tasks from the middle of the list. A | ||||||||||||||||||
* queue would provide O(1) insertion, but O(N) removal in average. A skip-list | ||||||||||||||||||
* provides O(log(N)) average across all tasks. | ||||||||||||||||||
* <p> | ||||||||||||||||||
* Histogram creation is O(N) for both the queue and list options, because we have to | ||||||||||||||||||
* consider which bucket each active task belongs. | ||||||||||||||||||
*/ | ||||||||||||||||||
private final Deque<SampleImpl> activeTasks = new ConcurrentLinkedDeque<>(); | ||||||||||||||||||
private final NavigableSet<SampleImpl> activeTasks = new ConcurrentSkipListSet<>(); | ||||||||||||||||||
|
||||||||||||||||||
private final AtomicInteger counter = new AtomicInteger(); | ||||||||||||||||||
|
||||||||||||||||||
private final Clock clock; | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -91,11 +98,27 @@ public DefaultLongTaskTimer(Id id, Clock clock, TimeUnit baseTimeUnit, | |||||||||||||||||
|
||||||||||||||||||
@Override | ||||||||||||||||||
public Sample start() { | ||||||||||||||||||
SampleImpl sample = new SampleImpl(); | ||||||||||||||||||
activeTasks.add(sample); | ||||||||||||||||||
final long startTime = clock.monotonicTime(); | ||||||||||||||||||
SampleImpl sample = new SampleImpl(startTime); | ||||||||||||||||||
if (!activeTasks.add(sample)) { | ||||||||||||||||||
sample = new SampleImplCounted(startTime, nextNonZeroCounter()); | ||||||||||||||||||
activeTasks.add(sample); | ||||||||||||||||||
} | ||||||||||||||||||
return sample; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
private int nextNonZeroCounter() { | ||||||||||||||||||
int nextCount; | ||||||||||||||||||
while ((nextCount = counter.incrementAndGet()) == 0) { | ||||||||||||||||||
} | ||||||||||||||||||
return nextCount; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
// @VisibleForTesting | ||||||||||||||||||
void setCounter(int newCounter) { | ||||||||||||||||||
counter.set(newCounter); | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
@Override | ||||||||||||||||||
public double duration(TimeUnit unit) { | ||||||||||||||||||
long now = clock.monotonicTime(); | ||||||||||||||||||
|
@@ -108,8 +131,13 @@ public double duration(TimeUnit unit) { | |||||||||||||||||
|
||||||||||||||||||
@Override | ||||||||||||||||||
public double max(TimeUnit unit) { | ||||||||||||||||||
Sample oldest = activeTasks.peek(); | ||||||||||||||||||
return oldest == null ? 0.0 : oldest.duration(unit); | ||||||||||||||||||
try { | ||||||||||||||||||
Sample oldest = activeTasks.first(); | ||||||||||||||||||
return oldest.duration(unit); | ||||||||||||||||||
} | ||||||||||||||||||
catch (NoSuchElementException e) { | ||||||||||||||||||
return 0.0; | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
@Override | ||||||||||||||||||
|
@@ -215,14 +243,18 @@ public HistogramSnapshot takeSnapshot() { | |||||||||||||||||
(ps, scaling) -> ps.print("Summary output for LongTaskTimer histograms is not supported.")); | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
class SampleImpl extends Sample { | ||||||||||||||||||
class SampleImpl extends Sample implements Comparable<SampleImpl> { | ||||||||||||||||||
|
||||||||||||||||||
private final long startTime; | ||||||||||||||||||
|
||||||||||||||||||
private volatile boolean stopped; | ||||||||||||||||||
|
||||||||||||||||||
private SampleImpl() { | ||||||||||||||||||
this.startTime = clock.monotonicTime(); | ||||||||||||||||||
private SampleImpl(long startTime) { | ||||||||||||||||||
this.startTime = startTime; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
int counter() { | ||||||||||||||||||
return 0; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
@Override | ||||||||||||||||||
|
@@ -249,6 +281,31 @@ public String toString() { | |||||||||||||||||
+ ", " + "duration(nanos)=" + durationInNanoseconds + ", " + "startTimeNanos=" + startTime + '}'; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
@Override | ||||||||||||||||||
public int compareTo(DefaultLongTaskTimer.SampleImpl o) { | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will be called a lot for the same instance, can we add a shortcut? if (this == o) {
return 0;
} |
||||||||||||||||||
int startCompare = Long.compare(startTime, o.startTime); | ||||||||||||||||||
if (startCompare == 0) { | ||||||||||||||||||
return Integer.compare(counter(), o.counter()); | ||||||||||||||||||
} | ||||||||||||||||||
Comment on lines
+286
to
+289
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a formatting/readability (hopefully) improvement using
Suggested change
|
||||||||||||||||||
return startCompare; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
class SampleImplCounted extends SampleImpl { | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we either:
|
||||||||||||||||||
|
||||||||||||||||||
private final int counter; | ||||||||||||||||||
|
||||||||||||||||||
private SampleImplCounted(long startTime, int counter) { | ||||||||||||||||||
super(startTime); | ||||||||||||||||||
this.counter = counter; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
@Override | ||||||||||||||||||
int counter() { | ||||||||||||||||||
return counter; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,115 @@ | ||||||
/* | ||||||
* Copyright 2024 VMware, Inc. | ||||||
* | ||||||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
* you may not use this file except in compliance with the License. | ||||||
* You may obtain a copy of the License at | ||||||
* | ||||||
* https://www.apache.org/licenses/LICENSE-2.0 | ||||||
* | ||||||
* Unless required by applicable law or agreed to in writing, software | ||||||
* distributed under the License is distributed on an "AS IS" BASIS, | ||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
* See the License for the specific language governing permissions and | ||||||
* limitations under the License. | ||||||
*/ | ||||||
package io.micrometer.core.instrument.internal; | ||||||
|
||||||
import io.micrometer.core.instrument.LongTaskTimer; | ||||||
import io.micrometer.core.instrument.MeterRegistry; | ||||||
import io.micrometer.core.instrument.MockClock; | ||||||
import io.micrometer.core.instrument.internal.DefaultLongTaskTimer.SampleImpl; | ||||||
import io.micrometer.core.instrument.internal.DefaultLongTaskTimer.SampleImplCounted; | ||||||
import io.micrometer.core.instrument.simple.SimpleConfig; | ||||||
import io.micrometer.core.instrument.simple.SimpleMeterRegistry; | ||||||
import org.assertj.core.api.AbstractComparableAssert; | ||||||
import org.assertj.core.api.Assertions; | ||||||
import org.assertj.core.api.InstanceOfAssertFactory; | ||||||
import org.junit.jupiter.api.Test; | ||||||
|
||||||
import java.util.concurrent.TimeUnit; | ||||||
|
||||||
import static org.assertj.core.api.Assertions.assertThat; | ||||||
|
||||||
class DefaultLongTaskTimerTest { | ||||||
|
||||||
private static final InstanceOfAssertFactory<SampleImpl, AbstractComparableAssert<?, SampleImpl>> SAMPLE_IMPL_ASSERT = new InstanceOfAssertFactory<>( | ||||||
SampleImpl.class, Assertions::assertThat); | ||||||
|
||||||
@Test | ||||||
void sampleTimestampCollision() { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about naming the tests about the behavior we expect?
Suggested change
|
||||||
final MockClock clock = new MockClock(); | ||||||
MeterRegistry registry = new SimpleMeterRegistry(SimpleConfig.DEFAULT, clock); | ||||||
LongTaskTimer t = LongTaskTimer.builder("my.timer").register(registry); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we eliminate one-letter variables e.g.: use |
||||||
|
||||||
final LongTaskTimer.Sample sample1 = t.start(); | ||||||
final LongTaskTimer.Sample sample2 = t.start(); | ||||||
assertThat(sample1).isInstanceOf(SampleImpl.class).isNotInstanceOf(SampleImplCounted.class); | ||||||
assertThat(sample2).isInstanceOf(SampleImplCounted.class).isNotSameAs(sample1); | ||||||
final SampleImpl sampleImpl1 = (SampleImpl) sample1; | ||||||
final SampleImpl sampleImpl2 = (SampleImpl) sample2; | ||||||
|
||||||
clock.addSeconds(1); | ||||||
|
||||||
assertThat(sample1.duration(TimeUnit.SECONDS)).isEqualTo(1); | ||||||
assertThat(sample2.duration(TimeUnit.SECONDS)).isEqualTo(1); | ||||||
|
||||||
final LongTaskTimer.Sample sample3 = t.start(); | ||||||
final LongTaskTimer.Sample sample4 = t.start(); | ||||||
assertThat(sample3).isInstanceOf(SampleImpl.class).isNotInstanceOf(SampleImplCounted.class); | ||||||
assertThat(sample4).isInstanceOf(SampleImplCounted.class); | ||||||
final SampleImpl sampleImpl3 = (SampleImpl) sample3; | ||||||
final SampleImpl sampleImpl4 = (SampleImpl) sample4; | ||||||
|
||||||
assertThat(t.activeTasks()).isEqualTo(4); | ||||||
|
||||||
assertThat(sampleImpl4).isEqualByComparingTo(sampleImpl4) | ||||||
.isGreaterThan(sampleImpl3) | ||||||
.isGreaterThan(sampleImpl2) | ||||||
.isGreaterThan(sampleImpl1); | ||||||
assertThat(sampleImpl3).isEqualByComparingTo(sampleImpl3) | ||||||
.isLessThan(sampleImpl4) | ||||||
.isGreaterThan(sampleImpl2) | ||||||
.isGreaterThan(sampleImpl1); | ||||||
assertThat(sampleImpl2).isEqualByComparingTo(sampleImpl2) | ||||||
.isLessThan(sampleImpl4) | ||||||
.isLessThan(sampleImpl3) | ||||||
.isGreaterThan(sampleImpl1); | ||||||
assertThat(sampleImpl1).isEqualByComparingTo(sampleImpl1) | ||||||
.isLessThan(sampleImpl4) | ||||||
.isLessThan(sampleImpl3) | ||||||
.isLessThan(sampleImpl2); | ||||||
|
||||||
assertThat(sample2.stop()).isEqualTo(TimeUnit.SECONDS.toNanos(1)); | ||||||
assertThat(t.activeTasks()).isEqualTo(3); | ||||||
assertThat(sample3.stop()).isEqualTo(TimeUnit.SECONDS.toNanos(0)); | ||||||
assertThat(t.activeTasks()).isEqualTo(2); | ||||||
assertThat(sample4.stop()).isEqualTo(TimeUnit.SECONDS.toNanos(0)); | ||||||
assertThat(t.activeTasks()).isEqualTo(1); | ||||||
assertThat(sample1.stop()).isEqualTo(TimeUnit.SECONDS.toNanos(1)); | ||||||
assertThat(t.activeTasks()).isEqualTo(0); | ||||||
} | ||||||
|
||||||
@Test | ||||||
void counterJumpsZeroAndWraps() { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we split this into two?
|
||||||
|
||||||
MeterRegistry registry = new SimpleMeterRegistry(SimpleConfig.DEFAULT, new MockClock()); | ||||||
LongTaskTimer t = LongTaskTimer.builder("my.timer").register(registry); | ||||||
assertThat(t).isInstanceOf(DefaultLongTaskTimer.class); | ||||||
final DefaultLongTaskTimer dltt = (DefaultLongTaskTimer) t; | ||||||
|
||||||
dltt.setCounter(Integer.MAX_VALUE - 2); | ||||||
|
||||||
assertThat(t.start()).asInstanceOf(SAMPLE_IMPL_ASSERT).returns(0, SampleImpl::counter); | ||||||
assertThat(t.start()).asInstanceOf(SAMPLE_IMPL_ASSERT).returns(Integer.MAX_VALUE - 1, SampleImpl::counter); | ||||||
assertThat(t.start()).asInstanceOf(SAMPLE_IMPL_ASSERT).returns(Integer.MAX_VALUE, SampleImpl::counter); | ||||||
assertThat(t.start()).asInstanceOf(SAMPLE_IMPL_ASSERT).returns(Integer.MIN_VALUE, SampleImpl::counter); | ||||||
assertThat(t.start()).asInstanceOf(SAMPLE_IMPL_ASSERT).returns(Integer.MIN_VALUE + 1, SampleImpl::counter); | ||||||
|
||||||
dltt.setCounter(-2); | ||||||
|
||||||
assertThat(t.start()).asInstanceOf(SAMPLE_IMPL_ASSERT).returns(-1, SampleImpl::counter); | ||||||
assertThat(t.start()).asInstanceOf(SAMPLE_IMPL_ASSERT).returns(1, SampleImpl::counter); | ||||||
} | ||||||
|
||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure I understand why this (whole method) is needed. Could you explain?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should try to avoid
0
here because the classes are constructed such thatSampleImplCounted(startTime, 0)
compares equal toSampleImpl(startTime)
, so the two would collide again.This will happen on the 2^32-th collision on the same instance of
DefaultLongTaskTimer
, whencounter
has wrapped around the whole int range.If you feel skipping 1 in 2^32 collisions is acceptable compared to having the strange code, I can just remove this method and the relative test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a long way to say I would keep this and the
AtomicInteger
but I was thinking if we can eliminate theAtomicInteger
and useSystem.identityHashCode(this)
instead butidentityHashCode
(likehashCode
) does not guarantee uniqueness. UUID might be an alternative but performance might be worse.