-
Notifications
You must be signed in to change notification settings - Fork 1.1k
chore: asynchronous IO for connection fiber #6069
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
8d28c55
ec18a18
d587691
334ade2
f5b2787
74428b5
ae7188d
1a5c383
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,6 +31,9 @@ inputs: | |
| epoll: | ||
| required: false | ||
| type: string | ||
| df-arg: | ||
| required: false | ||
| type: string | ||
|
|
||
| runs: | ||
| using: "composite" | ||
|
|
@@ -55,14 +58,18 @@ runs: | |
| export ROOT_DIR="${GITHUB_WORKSPACE}/tests/dragonfly/valkey_search" | ||
| export UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1 # to crash on errors | ||
|
|
||
| if [[ "${{inputs.df-arg}}" == 'epoll' ]]; then | ||
| export DF_ARG="--df ${{inputs.df-arg}}" | ||
| fi | ||
|
|
||
| if [[ "${{inputs.epoll}}" == 'epoll' ]]; then | ||
| export FILTER="${{inputs.filter}} and not exclude_epoll" | ||
| # Run only replication tests with epoll | ||
| timeout 80m pytest -m "$FILTER" --durations=10 --timeout=300 --color=yes --json-report --json-report-file=report.json dragonfly --df force_epoll=true --log-cli-level=INFO || code=$? | ||
| timeout 80m pytest -m "$FILTER" $DF_ARG --durations=10 --timeout=300 --color=yes --json-report --json-report-file=report.json dragonfly --df force_epoll=true --df --log-cli-level=INFO || code=$? | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is an extra 🤖 Was this useful? React with 👍 or 👎 |
||
| else | ||
| export FILTER="${{inputs.filter}}" | ||
| # Run only replication tests with iouring | ||
| timeout 80m pytest -m "$FILTER" --durations=10 --timeout=300 --color=yes --json-report --json-report-file=report.json dragonfly --log-cli-level=INFO || code=$? | ||
| timeout 80m pytest -m "$FILTER" $DF_ARG --durations=10 --timeout=300 --color=yes --json-report --json-report-file=report.json dragonfly --log-cli-level=INFO || code=$? | ||
| fi | ||
|
|
||
| # timeout returns 124 if we exceeded the timeout duration | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| name: RegTests IoLoopV2 | ||
|
|
||
| # Manually triggered only | ||
| on: | ||
| workflow_dispatch: | ||
| push: | ||
|
|
||
| jobs: | ||
| build: | ||
| strategy: | ||
| matrix: | ||
| # Test of these containers | ||
| container: ["ubuntu-dev:20"] | ||
| proactor: [Uring] | ||
| build-type: [Debug, Release] | ||
| runner: [ubuntu-latest, [self-hosted, linux, ARM64]] | ||
|
|
||
| runs-on: ${{ matrix.runner }} | ||
|
|
||
| container: | ||
| image: ghcr.io/romange/${{ matrix.container }} | ||
| options: --security-opt seccomp=unconfined --sysctl "net.ipv6.conf.all.disable_ipv6=0" | ||
| volumes: | ||
| - /var/crash:/var/crash | ||
|
|
||
| steps: | ||
| - uses: actions/checkout@v5 | ||
| with: | ||
| submodules: true | ||
|
|
||
| - name: Print environment info | ||
| run: | | ||
| cat /proc/cpuinfo | ||
| ulimit -a | ||
| env | ||
|
|
||
| - name: Configure & Build | ||
| run: | | ||
| # -no-pie to disable address randomization so we could symbolize stacktraces | ||
| cmake -B ${GITHUB_WORKSPACE}/build -DCMAKE_BUILD_TYPE=${{matrix.build-type}} -GNinja \ | ||
| -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DPRINT_STACKTRACES_ON_SIGNAL=ON \ | ||
| -DCMAKE_CXX_FLAGS=-no-pie -DHELIO_STACK_CHECK:STRING=4096 | ||
|
|
||
| cd ${GITHUB_WORKSPACE}/build && ninja dragonfly | ||
| pwd | ||
| ls -l .. | ||
|
|
||
| - name: Run regression tests action | ||
| uses: ./.github/actions/regression-tests | ||
| with: | ||
| dfly-executable: dragonfly | ||
| gspace-secret: ${{ secrets.GSPACES_BOT_DF_BUILD }} | ||
| build-folder-name: build | ||
| filter: ${{ matrix.build-type == 'Release' && 'not debug_only and not tls' || 'not opt_only and not tls' }} | ||
| aws-access-key-id: ${{ secrets.AWS_S3_ACCESS_KEY }} | ||
| aws-secret-access-key: ${{ secrets.AWS_S3_ACCESS_SECRET }} | ||
| s3-bucket: ${{ secrets.S3_REGTEST_BUCKET }} | ||
| df-arg: "expiremental_io_loop_v2" | ||
|
|
||
| - name: Upload logs on failure | ||
| if: failure() | ||
| uses: actions/upload-artifact@v4 | ||
| with: | ||
| name: logs | ||
| path: /tmp/failed/* | ||
|
|
||
| - name: Copy binary on a self hosted runner | ||
| if: failure() | ||
| run: | | ||
| # We must use sh syntax. | ||
| if [ "$RUNNER_ENVIRONMENT" = "self-hosted" ]; then | ||
| cd ${GITHUB_WORKSPACE}/build | ||
| timestamp=$(date +%Y-%m-%d_%H:%M:%S) | ||
| mv ./dragonfly /var/crash/dragonfy_${timestamp} | ||
| fi | ||
|
|
||
| lint-test-chart: | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/checkout@v5 | ||
| - uses: ./.github/actions/lint-test-chart |
| +33 −0 | .agent/README.md | |
| +460 −0 | .agent/cpp_guidelines.md | |
| +140 −0 | .github/copilot-instructions.md | |
| +19 −0 | .github/workflows/ci.yml | |
| +2 −2 | util/fibers/uring_proactor.cc |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,9 +10,13 @@ | |
| #include <absl/strings/str_cat.h> | ||
| #include <absl/time/time.h> | ||
|
|
||
| #include <condition_variable> | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO remove the includes. plugin is too aggressive |
||
| #include <numeric> | ||
| #include <utility> | ||
| #include <variant> | ||
|
|
||
| #include "absl/cleanup/cleanup.h" | ||
| #include "absl/types/span.h" | ||
| #include "base/cycle_clock.h" | ||
| #include "base/flag_utils.h" | ||
| #include "base/flags.h" | ||
|
|
@@ -112,6 +116,8 @@ ABSL_FLAG(uint32_t, pipeline_wait_batch_usec, 0, | |
| "If non-zero, waits for this time for more I/O " | ||
| " events to come for the connection in case there is only one command in the pipeline. "); | ||
|
|
||
| ABSL_FLAG(bool, expiremental_io_loop_v2, false, "new io loop"); | ||
|
|
||
| using namespace util; | ||
| using namespace std; | ||
| using absl::GetFlag; | ||
|
|
@@ -695,6 +701,8 @@ void Connection::OnShutdown() { | |
| VLOG(1) << "Connection::OnShutdown"; | ||
|
|
||
| BreakOnce(POLLHUP); | ||
| io_ec_ = make_error_code(errc::connection_aborted); | ||
| io_event_.notify(); | ||
| } | ||
|
|
||
| void Connection::OnPreMigrateThread() { | ||
|
|
@@ -1096,7 +1104,12 @@ void Connection::ConnectionFlow() { | |
| // Main loop. | ||
| if (parse_status != ERROR && !ec) { | ||
| UpdateIoBufCapacity(io_buf_, stats_, [&]() { io_buf_.EnsureCapacity(64); }); | ||
| auto res = IoLoop(); | ||
| variant<error_code, Connection::ParserStatus> res; | ||
| if (GetFlag(FLAGS_expiremental_io_loop_v2)) { | ||
| res = IoLoopV2(); | ||
| } else { | ||
| res = IoLoop(); | ||
| } | ||
|
|
||
| if (holds_alternative<error_code>(res)) { | ||
| ec = get<error_code>(res); | ||
|
|
@@ -1154,6 +1167,10 @@ void Connection::ConnectionFlow() { | |
| } | ||
| } | ||
|
|
||
| if (GetFlag(FLAGS_expiremental_io_loop_v2)) { | ||
| socket_->ResetOnRecvHook(); | ||
| } | ||
|
|
||
| if (ec && !FiberSocketBase::IsConnClosed(ec)) { | ||
| string conn_info = service_->GetContextInfo(cc_.get()).Format(); | ||
| LOG_EVERY_T(WARNING, 1) << "Socket error for connection " << conn_info << " " << GetName() | ||
|
|
@@ -1225,6 +1242,7 @@ Connection::ParserStatus Connection::ParseRedis(unsigned max_busy_cycles) { | |
| auto dispatch_async = [this]() -> MessageHandle { return {FromArgs(tmp_parse_args_)}; }; | ||
|
|
||
| io::Bytes read_buffer = io_buf_.InputBuffer(); | ||
| size_t total = 0; | ||
| do { | ||
| result = redis_parser_->Parse(read_buffer, &consumed, &tmp_parse_args_); | ||
| request_consumed_bytes_ += consumed; | ||
|
|
@@ -1258,6 +1276,7 @@ Connection::ParserStatus Connection::ParseRedis(unsigned max_busy_cycles) { | |
| << "Redis parser error: " << result << " during parse: " << ToSV(read_buffer); | ||
| } | ||
| read_buffer.remove_prefix(consumed); | ||
| total += consumed; | ||
|
|
||
| // We must yield from time to time to allow other fibers to run. | ||
| // Specifically, if a client sends a huge chunk of data resulting in a very long pipeline, | ||
|
|
@@ -1268,7 +1287,7 @@ Connection::ParserStatus Connection::ParseRedis(unsigned max_busy_cycles) { | |
| } | ||
| } while (RedisParser::OK == result && read_buffer.size() > 0 && !reply_builder_->GetError()); | ||
|
|
||
| io_buf_.ConsumeInput(io_buf_.InputLen()); | ||
| io_buf_.ConsumeInput(total); | ||
|
|
||
| parser_error_ = result; | ||
| if (result == RedisParser::OK) | ||
|
|
@@ -1430,7 +1449,7 @@ io::Result<size_t> Connection::HandleRecvSocket() { | |
| return recv_sz; | ||
| } | ||
|
|
||
| auto Connection::IoLoop() -> variant<error_code, ParserStatus> { | ||
| variant<error_code, Connection::ParserStatus> Connection::IoLoop() { | ||
| error_code ec; | ||
| ParserStatus parse_status = OK; | ||
|
|
||
|
|
@@ -2161,6 +2180,158 @@ bool Connection::WeakRef::operator==(const WeakRef& other) const { | |
| return client_id_ == other.client_id_; | ||
| } | ||
|
|
||
| void Connection::DoReadOnRecv(const util::FiberSocketBase::RecvNotification& n) { | ||
| if (std::holds_alternative<std::error_code>(n.read_result)) { | ||
| io_ec_ = std::get<std::error_code>(n.read_result); | ||
| return; | ||
| } | ||
|
|
||
| // TODO non epoll API via EnableRecvMultishot | ||
| // if (std::holds_alternative<io::MutableBytes>(n.read_result)) | ||
|
|
||
| if (std::holds_alternative<bool>(n.read_result)) { | ||
| if (!std::get<bool>(n.read_result)) { | ||
| io_ec_ = make_error_code(errc::connection_aborted); | ||
| return; | ||
| } | ||
|
|
||
| if (io_buf_.AppendLen() == 0) { | ||
| // We will regrow in IoLoop | ||
| return; | ||
| } | ||
|
|
||
| io::MutableBytes buf = io_buf_.AppendBuffer(); | ||
| int res = recv(socket_->native_handle(), buf.data(), buf.size(), 0); | ||
|
|
||
| // error path | ||
| if (res < 0) { | ||
| // LOG(INFO) << "ERROR"; | ||
| if (errno == EAGAIN || errno == EWOULDBLOCK) { | ||
| return; | ||
| } | ||
|
|
||
| if (errno == ECONNRESET) { | ||
| // The peer can shutdown the connection abruptly. | ||
| io_ec_ = make_error_code(errc::connection_aborted); | ||
| } | ||
|
|
||
| LOG_IF(FATAL, !io_ec_) << "Recv error: " << strerror(-res) << " errno " << errno; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
🤖 Was this useful? React with 👍 or 👎 |
||
| return; | ||
| } | ||
|
|
||
| if (res == 0) { | ||
| io_ec_ = make_error_code(errc::connection_aborted); | ||
| return; | ||
| } | ||
| // A recv call can return fewer bytes than requested even if the | ||
| // socket buffer actually contains enough data to satisfy the full request. | ||
| // TODO maybe worth looping here and try another recv call until it fails | ||
| // with EAGAIN or EWOULDBLOCK. The problem there is that we need to handle | ||
| // resizing if AppendBuffer is zero. | ||
| io_buf_.CommitWrite(res); | ||
| return; | ||
| } | ||
|
|
||
| DCHECK(false) << "Sould not reach here"; | ||
| } | ||
|
|
||
| variant<error_code, Connection::ParserStatus> Connection::IoLoopV2() { | ||
| error_code ec; | ||
| ParserStatus parse_status = OK; | ||
|
|
||
| size_t max_iobfuf_len = GetFlag(FLAGS_max_client_iobuf_len); | ||
|
|
||
| auto* peer = socket_.get(); | ||
| recv_buf_.res_len = 0; | ||
|
|
||
| // TODO EnableRecvMultishot | ||
|
|
||
| // Breaks with TLS. RegisterOnRecv is unimplemented. | ||
| peer->RegisterOnRecv([this](const FiberSocketBase::RecvNotification& n) { | ||
| DoReadOnRecv(n); | ||
| io_event_.notify(); | ||
| }); | ||
|
|
||
| do { | ||
| HandleMigrateRequest(); | ||
|
|
||
| // We *must* poll again for readiness. The event handler we registered above | ||
| // with RegisterOnRecv() will get called *once* for each socket readiness event. | ||
| // So, when we get notified below in io_event_.wait() we might read less data | ||
| // than it is available because io_buf_ does not have enough capacity. If we loop, | ||
| // and do not attempt to read from the socket again we can deadlock. To avoid this, | ||
| // we poll once for readiness before preempting. | ||
| DoReadOnRecv(FiberSocketBase::RecvNotification{true}); | ||
| io_event_.await( | ||
| [this]() { return io_buf_.InputLen() > 0 || io_ec_ || io_buf_.AppendLen() == 0; }); | ||
|
|
||
| if (io_ec_) { | ||
| LOG_IF(WARNING, cntx()->replica_conn) << "async io error: " << ec; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The log line prints 🤖 Was this useful? React with 👍 or 👎 |
||
| return std::exchange(io_ec_, {}); | ||
| } | ||
|
|
||
| phase_ = PROCESS; | ||
| bool is_iobuf_full = io_buf_.AppendLen() == 0; | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No functional change from here and onwards in comparison IoLoop |
||
|
|
||
| if (io_buf_.InputLen() > 0) { | ||
| if (redis_parser_) { | ||
| parse_status = ParseRedis(max_busy_read_cycles_cached); | ||
| } else { | ||
| DCHECK(memcache_parser_); | ||
| parse_status = ParseMemcache(); | ||
| } | ||
| } else { | ||
| parse_status = NEED_MORE; | ||
| DCHECK(io_buf_.AppendLen() == 0); | ||
| } | ||
|
|
||
| if (reply_builder_->GetError()) { | ||
| return reply_builder_->GetError(); | ||
| } | ||
|
|
||
| if (parse_status == NEED_MORE) { | ||
| parse_status = OK; | ||
|
|
||
| size_t capacity = io_buf_.Capacity(); | ||
| if (capacity < max_iobfuf_len) { | ||
| size_t parser_hint = 0; | ||
| if (redis_parser_) | ||
| parser_hint = redis_parser_->parselen_hint(); // Could be done for MC as well. | ||
|
|
||
| // If we got a partial request and we managed to parse its | ||
| // length, make sure we have space to store it instead of | ||
| // increasing space incrementally. | ||
| // (Note: The buffer object is only working in power-of-2 sizes, | ||
| // so there's no danger of accidental O(n^2) behavior.) | ||
| if (parser_hint > capacity) { | ||
| UpdateIoBufCapacity(io_buf_, stats_, | ||
| [&]() { io_buf_.Reserve(std::min(max_iobfuf_len, parser_hint)); }); | ||
| } | ||
|
|
||
| // If we got a partial request because iobuf was full, grow it up to | ||
| // a reasonable limit to save on Recv() calls. | ||
| if (is_iobuf_full && capacity < max_iobfuf_len / 2) { | ||
| // Last io used most of the io_buf to the end. | ||
| UpdateIoBufCapacity(io_buf_, stats_, [&]() { | ||
| io_buf_.Reserve(capacity * 2); // Valid growth range. | ||
| }); | ||
| } | ||
|
|
||
| if (io_buf_.AppendLen() == 0U) { | ||
| // it can happen with memcached but not for RedisParser, because RedisParser fully | ||
| // consumes the passed buffer | ||
| LOG_EVERY_T(WARNING, 10) | ||
| << "Maximum io_buf length reached, consider to increase max_client_iobuf_len flag"; | ||
| } | ||
| } | ||
| } else if (parse_status != OK) { | ||
| break; | ||
| } | ||
| } while (peer->IsOpen()); | ||
|
|
||
| return parse_status; | ||
| } | ||
|
|
||
| void ResetStats() { | ||
| auto& cstats = tl_facade_stats->conn_stats; | ||
| cstats.pipelined_cmd_cnt = 0; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DF_ARGis only set wheninputs.df-argequals 'epoll', which prevents other values (e.g., the workflow’s 'expiremental_io_loop_v2') from being propagated to pytest.🤖 Was this useful? React with 👍 or 👎