Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions ruby/lib/ci/queue.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
require 'ci/queue/file'
require 'ci/queue/grind'
require 'ci/queue/bisect'
require 'ci/queue/strategy/base'
require 'ci/queue/strategy/random'
require 'ci/queue/strategy/timing_based'

module CI
module Queue
Expand All @@ -28,11 +31,12 @@ def requeueable?(test_result)
requeueable.nil? || requeueable.call(test_result)
end

def shuffle(tests, random)
def shuffle(tests, random, config: nil)
if shuffler
shuffler.call(tests, random)
else
tests.sort.shuffle(random: random)
strategy = get_strategy(config&.strategy)
strategy.order_tests(tests, random: random, config: config)
end
end

Expand All @@ -51,5 +55,16 @@ def from_uri(url, config)
end
implementation.from_uri(uri, config)
end

private

def get_strategy(strategy_name)
case strategy_name&.to_sym
when :timing_based
Strategy::TimingBased.new
else
Strategy::Random.new
end
end
end
end
8 changes: 7 additions & 1 deletion ruby/lib/ci/queue/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class Configuration
attr_accessor :requeue_tolerance, :namespace, :failing_test, :statsd_endpoint
attr_accessor :max_test_duration, :max_test_duration_percentile, :track_test_duration
attr_accessor :max_test_failed, :redis_ttl
attr_accessor :strategy, :timing_file, :timing_fallback_duration, :export_timing_file
attr_reader :circuit_breakers
attr_writer :seed, :build_id
attr_writer :queue_init_timeout, :report_timeout, :inactive_workers_timeout
Expand Down Expand Up @@ -51,7 +52,8 @@ def initialize(
grind_count: nil, max_duration: nil, failure_file: nil, max_test_duration: nil,
max_test_duration_percentile: 0.5, track_test_duration: false, max_test_failed: nil,
queue_init_timeout: nil, redis_ttl: 8 * 60 * 60, report_timeout: nil, inactive_workers_timeout: nil,
export_flaky_tests_file: nil, known_flaky_tests: []
export_flaky_tests_file: nil, known_flaky_tests: [],
strategy: :random, timing_file: nil, timing_fallback_duration: 100.0, export_timing_file: nil
)
@build_id = build_id
@circuit_breakers = [CircuitBreaker::Disabled]
Expand All @@ -77,6 +79,10 @@ def initialize(
@report_timeout = report_timeout
@inactive_workers_timeout = inactive_workers_timeout
@export_flaky_tests_file = export_flaky_tests_file
@strategy = strategy
@timing_file = timing_file
@timing_fallback_duration = timing_fallback_duration
@export_timing_file = export_timing_file
end

def queue_init_timeout
Expand Down
2 changes: 1 addition & 1 deletion ruby/lib/ci/queue/redis/worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def distributed?

def populate(tests, random: Random.new)
@index = tests.map { |t| [t.id, t] }.to_h
tests = Queue.shuffle(tests, random)
tests = Queue.shuffle(tests, random, config: config)
push(tests.map(&:id))
self
end
Expand Down
13 changes: 13 additions & 0 deletions ruby/lib/ci/queue/strategy/base.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# frozen_string_literal: true

module CI
module Queue
module Strategy
class Base
def order_tests(tests, random: Random.new, config: nil)
raise NotImplementedError, "#{self.class} must implement #order_tests"
end
end
end
end
end
14 changes: 14 additions & 0 deletions ruby/lib/ci/queue/strategy/random.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# frozen_string_literal: true
require_relative 'base'

module CI
module Queue
module Strategy
class Random < Base
def order_tests(tests, random: Random.new, config: nil)
tests.sort.shuffle(random: random)
end
end
end
end
end
35 changes: 35 additions & 0 deletions ruby/lib/ci/queue/strategy/timing_based.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# frozen_string_literal: true
require_relative 'base'
require 'json'

module CI
module Queue
module Strategy
class TimingBased < Base
def order_tests(tests, random: Random.new, config: nil)
timing_data = load_timing_data(config&.timing_file)
fallback_duration = config&.timing_fallback_duration || 100.0

tests.sort_by do |test|
duration = timing_data[test.id] || fallback_duration
-duration # Negative for descending order (longest first)
end
end

private

def load_timing_data(file_path)
return {} unless file_path && ::File.exist?(file_path)

JSON.parse(::File.read(file_path))
rescue JSON::ParserError => e
warn "Warning: Could not parse timing file #{file_path}: #{e.message}"
{}
rescue => e
warn "Warning: Could not read timing file #{file_path}: #{e.message}"
{}
end
end
end
end
end
54 changes: 53 additions & 1 deletion ruby/lib/minitest/queue/runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ def run_command
TestDataReporter.new(namespace: queue_config&.namespace),
OrderReporter.new(path: 'log/test_order.log'),
]
if queue_config.track_test_duration
test_time_record = CI::Queue::Redis::TestTimeRecord.new(queue_url, queue_config)
reporters << TestTimeRecorder.new(build: test_time_record)
end
if queue_config.statsd_endpoint
reporters << Minitest::Reporters::StatsdReporter.new(statsd_endpoint: queue_config.statsd_endpoint)
end
Expand Down Expand Up @@ -218,8 +222,23 @@ def report_command
File.write(queue_config.export_flaky_tests_file, failures)
end

# Handle timing data reporting and export
test_time_reporter_success = if queue_config.track_test_duration
test_time_record = CI::Queue::Redis::TestTimeRecord.new(queue_url, queue_config)
test_time_reporter = TestTimeReporter.new(
build: test_time_record,
limit: queue_config.max_test_duration,
percentile: queue_config.max_test_duration_percentile,
export_file: queue_config.export_timing_file
)
test_time_reporter.report
test_time_reporter.success?
else
true
end

reporter.report
exit! reporter.success? ? 0 : 1
exit! reporter.success? && test_time_reporter_success ? 0 : 1
end

def report_grind_command
Expand All @@ -241,6 +260,7 @@ def report_grind_command
build: test_time_record,
limit: queue_config.max_test_duration,
percentile: queue_config.max_test_duration_percentile,
export_file: queue_config.export_timing_file
)
test_time_reporter.report

Expand Down Expand Up @@ -564,6 +584,38 @@ def parser
self.test_globs = test_globs
end

help = <<~EOS
Test ordering strategy: random, timing_based (default: random)
EOS
opts.separator ""
opts.on('--strategy STRATEGY', help) do |strategy|
queue_config.strategy = strategy.to_sym
end

help = <<~EOS
Path to JSON timing file for timing_based strategy
EOS
opts.separator ""
opts.on('--timing-file PATH', help) do |path|
queue_config.timing_file = path
end

help = <<~EOS
Fallback duration in ms for unknown tests (default: 100)
EOS
opts.separator ""
opts.on('--timing-fallback DURATION', Float, help) do |duration|
queue_config.timing_fallback_duration = duration
end

help = <<~EOS
Export test timing data to JSON file after run (use with report command)
EOS
opts.separator ""
opts.on('--export-timing-file PATH', help) do |path|
queue_config.export_timing_file = path
end

opts.separator ""
opts.separator " retry: Replays a previous run in the same order."

Expand Down
3 changes: 2 additions & 1 deletion ruby/lib/minitest/queue/test_time_recorder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ def initialize(build:, **options)
def record(test)
return unless test.passed?
test_duration_in_milliseconds = test.time * 1000
@build.record(test.name, test_duration_in_milliseconds)
test_id = "#{test.klass}##{test.name}"
@build.record(test_id, test_duration_in_milliseconds)
end
end
end
Expand Down
20 changes: 19 additions & 1 deletion ruby/lib/minitest/queue/test_time_reporter.rb
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
# frozen_string_literal: true
require 'minitest/reporters'
require 'json'

module Minitest
module Queue
class TestTimeReporter < Minitest::Reporters::BaseReporter
include ::CI::Queue::OutputHelpers

def initialize(build:, limit: nil, percentile: nil, **options)
def initialize(build:, limit: nil, percentile: nil, export_file: nil, **options)
super(options)
@test_time_hash = build.fetch
@limit = limit
@percentile = percentile
@export_file = export_file
@success = true
end

def report
export_timing_data if @export_file

return if limit.nil? || test_time_hash.empty?

puts '+++ Test Time Report'
Expand Down Expand Up @@ -47,6 +51,20 @@ def record(*)

attr_reader :test_time_hash, :limit, :percentile

def export_timing_data
return if test_time_hash.empty?

# Convert test_time_hash to simple format: {"TestClass#method": avg_duration_ms}
timing_data = test_time_hash.transform_values do |durations|
durations.sum.to_f / durations.size # Average duration
end
Comment on lines +57 to +60
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most cases there should just be one duration value, but there are cases where the test is run by multiple workers successfully, so we just take the average in those cases

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is interesting...why would a successful test case get run multiple times in a single build?

Copy link
Contributor Author

@ebarajas ebarajas Oct 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If a test goes past the timeout for whatever reason (let's say it hits a really long GC pause), another worker can pick it up, and then both of them end up reporting the success

https://github.com/figma/ci-queue/blob/master/ruby/lib/ci/queue/redis/worker.rb#L208-L231


File.write(@export_file, JSON.pretty_generate(timing_data))
puts "Exported timing data for #{timing_data.size} tests to #{@export_file}"
rescue => e
puts "Warning: Failed to export timing data to #{@export_file}: #{e.message}"
end

def humanized_percentile
percentile_in_percentage = percentile * 100
"#{percentile_in_percentage.to_i}th"
Expand Down
53 changes: 52 additions & 1 deletion ruby/test/integration/grind_redis_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def test_grind_max_test_duration_failing
+++ Test Time Report
Detected 1 test(s) over the desired time limit.
Please make them faster than 1.0e-05ms in the 50th percentile.
test_flaky_passes:
ATest#test_flaky_passes:
EOS
assert output.start_with?(expected.strip)
assert_empty err
Expand Down Expand Up @@ -313,5 +313,56 @@ def test_grind_max_test_duration_percentile_outside_range
refute_empty err
assert err.include?("--max-test-duration-percentile must be within range (0, 1] (OptionParser::ParseError)")
end

def test_grind_export_timing_file
Dir.mktmpdir do |dir|
timing_file = File.join(dir, 'grind_timing_file.json')

system(
{ 'BUILDKITE' => '1' },
@exe, 'grind',
'--queue', @redis_url,
'--seed', 'foobar',
'--build', '1',
'--worker', '1',
'--timeout', '1',
'--grind-count', '10',
'--grind-list', 'grind_list_success.txt',
'--track-test-duration',
'-Itest',
'test/dummy_test.rb',
chdir: 'test/fixtures/',
)

out, err = capture_subprocess_io do
system(
{ 'BUILDKITE' => '1' },
@exe, 'report_grind',
'--queue', @redis_url,
'--seed', 'foobar',
'--build', '1',
'--worker', '1',
'--timeout', '5',
'--track-test-duration',
'--export-timing-file', timing_file,
chdir: 'test/fixtures/',
)
end

assert_empty err
assert_includes out, "Exported timing data for"
assert_includes out, "tests to #{timing_file}"

assert File.exist?(timing_file), "Timing file should exist"
content = File.read(timing_file)
timing_data = JSON.parse(content)

assert_equal 1, timing_data.size, "Should have timing data for 1 test"
test_id, duration = timing_data.first
assert_equal "ATest#test_flaky_passes", test_id
assert_kind_of Numeric, duration, "Duration should be numeric"
assert_operator duration, :>=, 0, "Duration should be non-negative"
end
end
end
end
Loading
Loading