diff --git a/daemon/Buffer.cpp b/daemon/Buffer.cpp index 2a9dfb01..a162e387 100644 --- a/daemon/Buffer.cpp +++ b/daemon/Buffer.cpp @@ -193,6 +193,17 @@ void Buffer::beginFrame(FrameType frameType) packInt(static_cast(frameType)); } +void Buffer::writeRawFrame(lib::Span frame) +{ + if (mIncludeResponseType) { + packInt(static_cast(ResponseType::APC_DATA)); + } + // Reserve space for the length + // NOLINTNEXTLINE(bugprone-narrowing-conversions, hicpp-signed-bitwise) + mWritePos = (mWritePos + sizeof(int32_t)) & mask; + writeBytes(frame.data(), frame.size()); +} + void Buffer::abortFrame() { mWritePos = mCommitPos; diff --git a/daemon/Buffer.h b/daemon/Buffer.h index 26eb6b19..a8ee3698 100644 --- a/daemon/Buffer.h +++ b/daemon/Buffer.h @@ -56,6 +56,7 @@ class Buffer : public IBufferControl, public IRawFrameBuilderWithDirectAccess { int packInt64(int64_t x) override; void writeBytes(const void * data, std::size_t count) override; void writeString(std::string_view str) override; + void writeRawFrame(lib::Span frame); void beginFrame(FrameType frameType) override; void abortFrame() override; diff --git a/daemon/CMakeLists.txt b/daemon/CMakeLists.txt index be51afb4..059c4045 100644 --- a/daemon/CMakeLists.txt +++ b/daemon/CMakeLists.txt @@ -4,8 +4,8 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.16 FATAL_ERROR) OPTION(ENABLE_VCPKG "Pull in dependencies using vcpkg" ON) IF(ENABLE_VCPKG) - SET(VCPKG_OVERLAY_TRIPLETS ${CMAKE_CURRENT_SOURCE_DIR}/cmake/triplets) - SET(CMAKE_TOOLCHAIN_FILE ${CMAKE_CURRENT_SOURCE_DIR}/../vcpkg/scripts/buildsystems/vcpkg.cmake) + SET(VCPKG_OVERLAY_TRIPLETS "${CMAKE_CURRENT_SOURCE_DIR}/cmake/triplets" CACHE STRING "") + SET(CMAKE_TOOLCHAIN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../vcpkg/scripts/buildsystems/vcpkg.cmake" CACHE STRING "") ENDIF() PROJECT(gatord C CXX) @@ -115,8 +115,6 @@ SET(GATORD_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/AnnotateListener.cpp ${CMAKE_CURRENT_SOURCE_DIR}/CCNDriver.h ${CMAKE_CURRENT_SOURCE_DIR}/Child.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Child.h - ${CMAKE_CURRENT_SOURCE_DIR}/Command.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Command.h ${CMAKE_CURRENT_SOURCE_DIR}/CommitTimeChecker.h ${CMAKE_CURRENT_SOURCE_DIR}/Config.h ${CMAKE_CURRENT_SOURCE_DIR}/Configuration.h @@ -217,18 +215,17 @@ SET(GATORD_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/AnnotateListener.cpp ${CMAKE_CURRENT_SOURCE_DIR}/SummaryBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/SummaryBuffer.h ${CMAKE_CURRENT_SOURCE_DIR}/Time.h - ${CMAKE_CURRENT_SOURCE_DIR}/Tracepoints.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Tracepoints.h ${CMAKE_CURRENT_SOURCE_DIR}/TtraceDriver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/TtraceDriver.h ${CMAKE_CURRENT_SOURCE_DIR}/UEvent.cpp ${CMAKE_CURRENT_SOURCE_DIR}/UEvent.h ${CMAKE_CURRENT_SOURCE_DIR}/UserSpaceSource.cpp ${CMAKE_CURRENT_SOURCE_DIR}/UserSpaceSource.h + ${CMAKE_CURRENT_SOURCE_DIR}/agents/agent_environment.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/agent_environment.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/agent_worker_base.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/agent_worker.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/agent_workers_process.h - ${CMAKE_CURRENT_SOURCE_DIR}/agents/apc/intermediate_buffer_consumer.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/common/coalescing_cpu_monitor.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/common/nl_cpu_monitor.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/common/polling_cpu_monitor.h @@ -240,24 +237,39 @@ SET(GATORD_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/AnnotateListener.cpp ${CMAKE_CURRENT_SOURCE_DIR}/agents/ext_source/ext_source_agent_main.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/ext_source/ext_source_agent_worker.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/ext_source/ipc_sink_wrapper.h - ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/apc_encoders.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/apc_encoders.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/async_buffer_builder.h + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/async_perf_ringbuffer_monitor.hpp ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/capture_configuration.cpp ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/capture_configuration.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/cpufreq_counter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/cpufreq_counter.h - ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/detail/perf_buffer_consumer_detail.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/detail/perf_buffer_consumer_detail.h - ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/frame_encoder.h + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/cpu_info.h + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/events/event_binding_manager.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/events/event_bindings.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/events/event_configuration.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/events/perf_activator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/events/perf_activator.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/events/perf_event_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/events/perf_event_utils.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/events/perf_ringbuffer_mmap.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/events/types.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_agent.h + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_agent_main.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_agent_main.h + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_agent_worker.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_buffer_consumer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_buffer_consumer.h + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_capture_cpu_monitor.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_capture.h + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_capture_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_driver_summary.cpp ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_driver_summary.h + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_frame_packer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/perf_frame_packer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/record_types.h + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/source_adapter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/source_adapter.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/sync_generator.h - ${CMAKE_CURRENT_SOURCE_DIR}/agents/perf/tracepoint_formats.h ${CMAKE_CURRENT_SOURCE_DIR}/agents/spawn_agent.cpp ${CMAKE_CURRENT_SOURCE_DIR}/agents/spawn_agent.h ${CMAKE_CURRENT_SOURCE_DIR}/android/AndroidActivityManager.cpp @@ -342,6 +354,7 @@ SET(GATORD_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/AnnotateListener.cpp ${CMAKE_CURRENT_SOURCE_DIR}/armnn/TimestampCorrector.cpp ${CMAKE_CURRENT_SOURCE_DIR}/armnn/TimestampCorrector.h ${CMAKE_CURRENT_SOURCE_DIR}/async/asio_traits.h + ${CMAKE_CURRENT_SOURCE_DIR}/async/async_line_reader.hpp ${CMAKE_CURRENT_SOURCE_DIR}/async/completion_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/async_initiate.h ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/continuation.h @@ -367,23 +380,37 @@ SET(GATORD_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/AnnotateListener.cpp ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/detail/then.h ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/detail/then_state.h ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/detail/trace.h + ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/detail/unpack_tuple.h + ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/detail/unpack_tuple_state.h ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/detail/unpack_variant.h ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/detail/unpack_variant_state.h ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/detail/use_continuation_state.h ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/nop_receiver.h ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/operations.h + ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/stored_continuation.h ${CMAKE_CURRENT_SOURCE_DIR}/async/continuations/use_continuation.h ${CMAKE_CURRENT_SOURCE_DIR}/async/netlink/nl_protocol.h ${CMAKE_CURRENT_SOURCE_DIR}/async/netlink/uevents.h + ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/async_exec.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/async_exec.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/async_process.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/async_process.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/async_proc_poller.cpp ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/async_proc_poller.h ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/async_read_proc_maps.h ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/async_read_proc_sys_dependencies.h ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/async_wait_for_process.h + ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/process_monitor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/process_monitor.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/process_state.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/process_state_tracker.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/async/proc/wait.hpp ${CMAKE_CURRENT_SOURCE_DIR}/capture/CaptureProcess.cpp ${CMAKE_CURRENT_SOURCE_DIR}/capture/CaptureProcess.h ${CMAKE_CURRENT_SOURCE_DIR}/capture/Environment.cpp ${CMAKE_CURRENT_SOURCE_DIR}/capture/Environment.h ${CMAKE_CURRENT_SOURCE_DIR}/capture/internal/UdpListener.h + ${CMAKE_CURRENT_SOURCE_DIR}/ipc/async_streamline_sender.h ${CMAKE_CURRENT_SOURCE_DIR}/ipc/codec.h ${CMAKE_CURRENT_SOURCE_DIR}/ipc/message_key.h ${CMAKE_CURRENT_SOURCE_DIR}/ipc/messages.h @@ -394,11 +421,16 @@ SET(GATORD_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/AnnotateListener.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lib/Assert.h ${CMAKE_CURRENT_SOURCE_DIR}/lib/AutoClosingFd.h ${CMAKE_CURRENT_SOURCE_DIR}/lib/EnumUtils.h + ${CMAKE_CURRENT_SOURCE_DIR}/lib/error_code_or.hpp ${CMAKE_CURRENT_SOURCE_DIR}/lib/exception.h ${CMAKE_CURRENT_SOURCE_DIR}/lib/File.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lib/FileDescriptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lib/FileDescriptor.h ${CMAKE_CURRENT_SOURCE_DIR}/lib/File.h + ${CMAKE_CURRENT_SOURCE_DIR}/lib/forked_process.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/lib/forked_process.h + ${CMAKE_CURRENT_SOURCE_DIR}/lib/forked_process_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/lib/forked_process_utils.h ${CMAKE_CURRENT_SOURCE_DIR}/lib/Format.h ${CMAKE_CURRENT_SOURCE_DIR}/lib/FsEntry.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lib/FsEntry.h @@ -406,8 +438,6 @@ SET(GATORD_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/AnnotateListener.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lib/GenericTimer.h ${CMAKE_CURRENT_SOURCE_DIR}/lib/Istream.h ${CMAKE_CURRENT_SOURCE_DIR}/lib/Memory.h - ${CMAKE_CURRENT_SOURCE_DIR}/lib/memory_pool.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lib/memory_pool.h ${CMAKE_CURRENT_SOURCE_DIR}/lib/PmuCommonEvents.h ${CMAKE_CURRENT_SOURCE_DIR}/lib/Popen.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lib/Popen.h @@ -436,17 +466,11 @@ SET(GATORD_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/AnnotateListener.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/PerCoreIdentificationThread.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/PerCoreIdentificationThread.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/attr_to_key_mapping_tracker.h - ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/id_to_key_mapping_tracker.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/IPerfAttrsConsumer.h - ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/IPerfBufferConsumer.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/IPerfGroups.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfAttrsBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfAttrsBuffer.h - ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfBuffer.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfConfig.h - ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfCpuOnlineMonitor.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfCpuOnlineMonitor.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfDriverConfiguration.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfDriverConfiguration.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfDriver.cpp @@ -458,14 +482,8 @@ SET(GATORD_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/AnnotateListener.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfEventGroupIdentifier.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfGroups.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfGroups.h - ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfSource.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfSource.h - ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfSyncThreadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfSyncThreadBuffer.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfSyncThread.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfSyncThread.h - ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfToMemoryBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfToMemoryBuffer.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/perf/PerfUtils.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/proc/ProcessChildren.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/proc/ProcessChildren.h @@ -481,6 +499,8 @@ SET(GATORD_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/AnnotateListener.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/proc/ProcStatFileRecord.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/SysfsSummaryInformation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/SysfsSummaryInformation.h + ${CMAKE_CURRENT_SOURCE_DIR}/linux/Tracepoints.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/linux/Tracepoints.h ${CMAKE_CURRENT_SOURCE_DIR}/logging/agent_log.cpp ${CMAKE_CURRENT_SOURCE_DIR}/logging/agent_log.h ${CMAKE_CURRENT_SOURCE_DIR}/logging/global_log.cpp @@ -494,11 +514,13 @@ SET(GATORD_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/AnnotateListener.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliDevice.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliDevice.h ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliGPUClockPolledDriverCounter.h + ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliGPUClockPolledDriver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliGPUClockPolledDriver.h ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliHwCntrDriver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliHwCntrDriver.h ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliHwCntr.h ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliHwCntrNames.h + ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliHwCntrNamesBifrost.h ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliHwCntrReader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliHwCntrReader.h ${CMAKE_CURRENT_SOURCE_DIR}/mali_userspace/MaliHwCntrSource.cpp diff --git a/daemon/Child.cpp b/daemon/Child.cpp index 5c041d48..c9e0dc95 100644 --- a/daemon/Child.cpp +++ b/daemon/Child.cpp @@ -3,7 +3,6 @@ #include "Child.h" #include "CapturedXML.h" -#include "Command.h" #include "ConfigurationXML.h" #include "CounterXML.h" #include "Driver.h" @@ -23,6 +22,7 @@ #include "StreamlineSetup.h" #include "UserSpaceSource.h" #include "armnn/ArmNNSource.h" +#include "capture/CaptureProcess.h" #include "lib/Assert.h" #include "lib/FsUtils.h" #include "lib/WaitForProcessPoller.h" @@ -72,21 +72,33 @@ void handleException() std::unique_ptr Child::createLocal(agents::i_agent_spawner_t & spawner, Drivers & drivers, const Child::Config & config, + capture::capture_process_event_listener_t & event_listener, logging::last_log_error_supplier_t last_error_supplier, logging::log_setup_supplier_t log_setup_supplier) { - return std::unique_ptr( - new Child(spawner, drivers, nullptr, config, std::move(last_error_supplier), std::move(log_setup_supplier))); + return std::unique_ptr(new Child(spawner, + drivers, + nullptr, + config, + event_listener, + std::move(last_error_supplier), + std::move(log_setup_supplier))); } std::unique_ptr Child::createLive(agents::i_agent_spawner_t & spawner, Drivers & drivers, OlySocket & sock, + capture::capture_process_event_listener_t & event_listener, logging::last_log_error_supplier_t last_error_supplier, logging::log_setup_supplier_t log_setup_supplier) { - return std::unique_ptr( - new Child(spawner, drivers, &sock, {}, std::move(last_error_supplier), std::move(log_setup_supplier))); + return std::unique_ptr(new Child(spawner, + drivers, + &sock, + {}, + event_listener, + std::move(last_error_supplier), + std::move(log_setup_supplier))); } Child * Child::getSingleton() @@ -110,6 +122,7 @@ Child::Child(agents::i_agent_spawner_t & spawner, Drivers & drivers, OlySocket * sock, Child::Config config, + capture::capture_process_event_listener_t & event_listener, logging::last_log_error_supplier_t last_error_supplier, logging::log_setup_supplier_t log_setup_supplier) : haltPipeline(), @@ -117,6 +130,7 @@ Child::Child(agents::i_agent_spawner_t & spawner, sender(), drivers(drivers), socket(sock), + event_listener(event_listener), numExceptions(0), sessionEnded(), config(std::move(config)), @@ -149,7 +163,7 @@ Child::~Child() runtime_assert(prevSingleton == this, "Exchanged Child::gSingleton with something other than this"); } -void Child::run(int notify_pid) +void Child::run() { prctl(PR_SET_NAME, reinterpret_cast(&"gatord-child"), 0, 0, 0); @@ -258,73 +272,36 @@ void Child::run(int notify_pid) primarySourceProvider.getDetectedUncorePmus()); } - std::set appPids; - bool enableOnCommandExec = false; - if (!gSessionData.mCaptureCommand.empty()) { - std::string captureCommand; - for (auto const & cmd : gSessionData.mCaptureCommand) { - captureCommand += " "; - captureCommand += cmd; - } - LOG_WARNING("Running command:%s", captureCommand.c_str()); - - // This is set before any threads are started so it doesn't need - // to be protected by a mutex - command = std::make_shared(Command::run([this]() { - if (gSessionData.mStopOnExit) { - LOG_DEBUG("Ending session because command exited"); - endSession(); - } - })); - - enableOnCommandExec = true; - - appPids.insert(command->getPid()); - LOG_DEBUG("Profiling pid: %d", command->getPid()); - } - // set up stop thread early, so that ping commands get replied to, even if the // setup phase below takes a long time. std::thread stopThread {[this]() { stopThreadEntryPoint(); }}; // tell the controller that we're ready for the app to start - if (notify_pid > 0) { - LOG_DEBUG("Telling notify_pid (%d) to start the target app", notify_pid); - kill(notify_pid, SIGUSR1); - } - - if (gSessionData.mWaitForProcessCommand != nullptr) { - LOG_DEBUG("Waiting for pids for command '%s'", gSessionData.mWaitForProcessCommand); - - WaitForProcessPoller poller {gSessionData.mWaitForProcessCommand}; - - while ((!poller.poll(appPids)) && !sessionEnded) { - usleep(1000); + auto execTargetCallback = [this]() { + LOG_DEBUG("Received exec_target callback"); + if (!event_listener.waiting_for_target()) { + handleException(); } - LOG_DEBUG("Got pids for command '%s'", gSessionData.mWaitForProcessCommand); - } - - // we only consider --pid for stop on exit if we weren't given an - // app to run - std::set watchPids = appPids.empty() ? gSessionData.mPids : appPids; - - appPids.insert(gSessionData.mPids.begin(), gSessionData.mPids.end()); + }; lib::Waiter waitTillStart; lib::Waiter waitForAgents; auto startedCallback = [&]() { + LOG_DEBUG("Received start capture callback"); waitTillStart.disable(); - if (command) { - command->start(); - } }; - auto newPrimarySource = primarySourceProvider.createPrimarySource(senderSem, - startedCallback, - appPids, - drivers.getFtraceDriver(), - enableOnCommandExec); + auto newPrimarySource = primarySourceProvider.createPrimarySource( + senderSem, + *sender, + [this]() -> bool { return sessionEnded; }, + execTargetCallback, + startedCallback, + gSessionData.mPids, + drivers.getFtraceDriver(), + !gSessionData.mCaptureCommand.empty(), + agent_workers_process); if (newPrimarySource == nullptr) { LOG_ERROR("%s", primarySourceProvider.getPrepareFailedMessage()); handleException(); @@ -377,11 +354,6 @@ void Child::run(int notify_pid) durationThread = std::thread([&]() { durationThreadEntryPoint(waitTillStart, waitTillEnd); }); } - std::thread watchPidsThread {}; - if (gSessionData.mStopOnExit && !watchPids.empty()) { - watchPidsThread = std::thread([&]() { watchPidsThreadEntryPoint(watchPids, waitTillEnd); }); - } - if (shouldStartUserSpaceSource(drivers.getAllPolledConst())) { if (!addSource(createUserSpaceSource(senderSem, drivers.getAllPolled()))) { LOG_ERROR("Unable to prepare userspace source for capture"); @@ -418,9 +390,6 @@ void Child::run(int notify_pid) thread.join(); } - if (watchPidsThread.joinable()) { - watchPidsThread.join(); - } if (durationThread.joinable()) { durationThread.join(); } @@ -448,12 +417,6 @@ void Child::run(int notify_pid) sources.clear(); sender.reset(); - - if (command) { - LOG_DEBUG("Waiting for command (PID: %d)", command->getPid()); - command->join(); - LOG_DEBUG("Command finished"); - } } template @@ -497,10 +460,6 @@ void Child::doEndSession() sessionEnded = true; - if (command) { - command->cancel(); - } - for (auto & source : sources) { source->interrupt(); } @@ -517,10 +476,6 @@ void Child::cleanupException() _exit(SECOND_EXCEPTION_EXIT_CODE); } - if (command) { - command->cancel(); - } - if (socket != nullptr) { if (sender) { // send the error, regardless of the command sent by Streamline diff --git a/daemon/Child.h b/daemon/Child.h index bc5695be..83d1f7ae 100644 --- a/daemon/Child.h +++ b/daemon/Child.h @@ -6,6 +6,7 @@ #include "Configuration.h" #include "Source.h" #include "agents/agent_workers_process.h" +#include "capture/CaptureProcess.h" #include "lib/AutoClosingFd.h" #include "logging/suppliers.h" @@ -38,11 +39,13 @@ class Child { static std::unique_ptr createLocal(agents::i_agent_spawner_t & spawner, Drivers & drivers, const Config & config, + capture::capture_process_event_listener_t & event_listener, logging::last_log_error_supplier_t last_error_supplier, logging::log_setup_supplier_t log_setup_supplier); static std::unique_ptr createLive(agents::i_agent_spawner_t & spawner, Drivers & drivers, OlySocket & sock, + capture::capture_process_event_listener_t & event_listener, logging::last_log_error_supplier_t last_error_supplier, logging::log_setup_supplier_t log_setup_supplier); @@ -55,12 +58,9 @@ class Child { Child & operator=(Child &&) = delete; /** - * @brief Runs the capture process. If notify_pid is set then SIGUSR1 will be sent to - * that pid when the capture process is ready for the target app to be started. - * - * @param notify_pid The pid to signal when the target app should be started. When <= 0 no signal is sent. + * @brief Runs the capture process */ - void run(int notify_pid); + void run(); void endSession(int signum = 0); @@ -80,6 +80,7 @@ class Child { std::unique_ptr sender; Drivers & drivers; OlySocket * socket; + capture::capture_process_event_listener_t & event_listener; int numExceptions; std::mutex sessionEndedMutex {}; lib::AutoClosingFd sessionEndEventFd {}; @@ -95,6 +96,7 @@ class Child { Drivers & drivers, OlySocket * sock, Config config, + capture::capture_process_event_listener_t & event_listener, logging::last_log_error_supplier_t last_error_supplier, logging::log_setup_supplier_t log_setup_supplier); diff --git a/daemon/Command.cpp b/daemon/Command.cpp deleted file mode 100644 index 1a47bf59..00000000 --- a/daemon/Command.cpp +++ /dev/null @@ -1,333 +0,0 @@ -/* Copyright (C) 2014-2022 by Arm Limited. All rights reserved. */ - -#include "Command.h" - -#include "ExitStatus.h" -#include "Logging.h" -#include "SessionData.h" -#include "lib/FileDescriptor.h" -#include "lib/String.h" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static bool getUid(const char * const name, const char * const tmpDir, uid_t * const uid) -{ - // Lookups may fail when using a different libc or a statically compiled executable - lib::printf_str_t<32> gatorTemp {"%s/gator_temp", tmpDir}; - - const int fd = open(gatorTemp, O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR); - if (fd < 0) { - return false; - } - close(fd); - - lib::printf_str_t<128> cmd {"chown %s %s || rm -f %s", name, gatorTemp.c_str(), gatorTemp.c_str()}; - - const int pid = fork(); - if (pid < 0) { - LOG_ERROR("fork failed"); - handleException(); - } - - if (pid == 0) { - execlp("sh", "sh", "-c", cmd, nullptr); - exit(COMMAND_FAILED_EXIT_CODE); - } - - while ((waitpid(pid, nullptr, 0) < 0) && (errno == EINTR)) { - } - - struct stat st; - int result = -1; - if (stat(gatorTemp, &st) != 0) { - return false; - } - result = st.st_uid; - unlink(gatorTemp); - *uid = result; - return true; -} - -static bool getUid(const char * const name, uid_t * const uid, gid_t * const gid) -{ - // Look up the username - struct passwd * const user = getpwnam(name); - if (user != nullptr) { - *uid = user->pw_uid; - *gid = user->pw_gid; - return true; - } - - // Unable to get the user without getpwanm, so create a unique uid by adding a fixed number to the pid - *gid = 0x484560f8 + getpid(); - - // Are we on Linux - if (access("/tmp", W_OK) == 0) { - return getUid(name, "/tmp", uid); - } - - // Are we on android - if (access("/data", W_OK) == 0) { - return getUid(name, "/data", uid); - } - - return false; -} - -static void checkCommandStatus(int pid) -{ - int status; - while (waitpid(pid, &status, WNOHANG) == -1) { - if (errno != EINTR) { - LOG_ERROR("Could not waitpid(%d) on child command. (%s)", pid, strerror(errno)); - return; - } - } - - if (WIFEXITED(status)) { - const int exitCode = WEXITSTATUS(status); - - // add some special case handling for when we are launching via bash shell - if ((gSessionData.mCaptureCommand.size() == 3) && (gSessionData.mCaptureCommand[0] == "sh") - && (gSessionData.mCaptureCommand[1] == "-c")) { - if (exitCode == 126) { - LOG_ERROR("Failed to run command %s: Permission denied or is a directory", - gSessionData.mCaptureCommand[2].c_str()); - handleException(); - } - if (exitCode == 127) { - LOG_ERROR("Failed to run command %s: Command not found", gSessionData.mCaptureCommand[2].c_str()); - handleException(); - } - } - - if (exitCode != 0) { - LOG_ERROR("command exited with code %d", exitCode); - } - else { - LOG_DEBUG("command exited with code 0"); - } - } - else if (WIFSIGNALED(status)) { - const int signal = WTERMSIG(status); - if (signal != SIGTERM && signal != SIGINT) { // should we consider any others normal? - LOG_ERROR("command terminated abnormally: %s", strsignal(signal)); - } - } -} - -void Command::start() -{ - sem_post(&(sharedData->start)); -} - -void Command::cancel() -{ - State expected = INITIALIZING; - if (sharedData->state.compare_exchange_strong(expected, KILLED_OR_EXITED)) { - // command will kill itself when it's finished initializing - return; - } - - expected = RUNNING; - if (!sharedData->state.compare_exchange_strong(expected, BEING_KILLED)) { - // already cancelled by someone else or already exited - return; - } - - start(); // just in case it was still waiting to start - - // once it is RUNNING, the command will have created it's own process group - // so we can signal the whole process group - if (::kill(-pid, SIGTERM) == -1) { - LOG_ERROR("kill(%d) failed (%d) %s", -pid, errno, strerror(errno)); - } - - if (sharedData->state.exchange(KILLED_OR_EXITED) != BEING_KILLED) { - // then must of exited in the meantime - // the waiter thread recognized it was BEING_KILLED, so left us to reap it. - checkCommandStatus(pid); - } -} - -Command Command::run(const std::function & terminationCallback) -{ - constexpr size_t buffer_size = 1 << 8; - - uid_t uid = geteuid(); - gid_t gid = getegid(); - const char * const name = gSessionData.mCaptureUser; - - // if name is null then just use the current user - if (name != nullptr) { - // for non root. - // Verify root permissions - const bool isRoot = (geteuid() == 0); - if (!isRoot) { - LOG_ERROR("Unable to set user to %s for command because gatord is not running as root", name); - handleException(); - } - - if (!getUid(name, &uid, &gid)) { - LOG_ERROR("Unable to look up the user %s, please double check that the user exists", name); - handleException(); - } - } - - auto sharedData = shared_memory::make_unique(); - // get references now before we move the pointer - auto & state = sharedData->state; - auto & start = sharedData->start; - - int pipefd[2]; - if (lib::pipe_cloexec(pipefd) != 0) { - LOG_ERROR("pipe failed"); - handleException(); - } - - const int pid = fork(); - if (pid < 0) { - LOG_ERROR("fork failed"); - handleException(); - } - - if (pid == 0) { - // child - lib::printf_str_t buf {}; - - // Reset signal handlers while waiting for exec - signal(SIGINT, SIG_DFL); - signal(SIGTERM, SIG_DFL); - signal(SIGABRT, SIG_DFL); - signal(SIGALRM, SIG_DFL); - - //Need to change the GPID so that all children of this process will have this processes PID as their GPID. - setpgid(pid, pid); - - State expected = INITIALIZING; - if (!state.compare_exchange_strong(expected, RUNNING)) { - // we've been cancelled before even starting - exit(0); - } - - prctl(PR_SET_NAME, reinterpret_cast(&"gatord-command"), 0, 0, 0); - - close(pipefd[0]); - - std::vector cmd_str {}; - for (const auto & string : gSessionData.mCaptureCommand) { - cmd_str.push_back(const_cast(string.c_str())); - } - cmd_str.push_back(nullptr); - char * const * const commands = cmd_str.data(); - - // Gator runs at a high priority, reset the priority to the default - if (setpriority(PRIO_PROCESS, syscall(__NR_gettid), 0) == -1) { - buf.printf("setpriority failed"); - goto fail_exit; - } - - if (name != nullptr) { - if (setgroups(1, &gid) != 0) { - buf.printf("setgroups failed for user: %s, please check if the user is part of group", name); - goto fail_exit; - } - if (setresgid(gid, gid, gid) != 0) { - buf.printf("setresgid failed for user: %s, please check if the user is part of GID %d", name, gid); - goto fail_exit; - } - if (setresuid(uid, uid, uid) != 0) { - buf.printf("setresuid failed for user: %s, please check if the user is part of UID %d", name, uid); - goto fail_exit; - } - } - - { - const char * const path = - gSessionData.mCaptureWorkingDir == nullptr ? "/" : gSessionData.mCaptureWorkingDir; - if (chdir(path) != 0) { - buf.printf("Unable to cd to %s, please verify the directory exists and is accessible to %s", - path, - name != nullptr ? name : "the current user"); - goto fail_exit; - } - } - sem_wait(&start); - - prctl(PR_SET_NAME, reinterpret_cast(commands[0]), 0, 0, 0); - execvp(commands[0], commands); - buf.printf("Failed to run command %s\nexecvp failed: %s", commands[0], strerror(errno)); - - fail_exit: - if (buf.size() > 0) { - const ssize_t bytes = write(pipefd[1], buf.c_str(), buf.size()); - // Can't do anything if this fails - (void) bytes; - } - - exit(COMMAND_FAILED_EXIT_CODE); - } - else { - // parent - - close(pipefd[1]); - - return {pid, - std::thread {[pipefd, pid, terminationCallback, &state]() { - prctl(PR_SET_NAME, reinterpret_cast(&"gatord-command-reader"), 0, 0, 0); - std::array buf {}; - ssize_t bytesRead = 0; - while (true) { - const ssize_t bytes = read(pipefd[0], buf.data() + bytesRead, buf.size() - bytesRead); - if (bytes > 0) { - bytesRead += bytes; - } - else if (bytes == 0) { - break; - } - else if (errno != EAGAIN) { - buf[bytesRead] = '\0'; - LOG_ERROR("Failed to read pipe from child: %s", strerror(errno)); - break; - } - } - - close(pipefd[0]); - - if (bytesRead > 0) { - LOG_ERROR("%s", buf.data()); - handleException(); - } - else { - siginfo_t info; - while (waitid(P_PID, pid, &info, WEXITED | WNOWAIT) == -1) { - if (errno != EINTR) { - LOG_ERROR("waitid(%d) failed (%d) %s", pid, errno, strerror(errno)); - break; - } - } - - if (state.exchange(KILLED_OR_EXITED) != BEING_KILLED) { - checkCommandStatus(pid); - } - // else cancel is being called. We don't want to reap - // the pid while it's trying to kill it in case the pid gets reused. - - terminationCallback(); - } - }}, - std::move(sharedData)}; - } -} diff --git a/daemon/Command.h b/daemon/Command.h deleted file mode 100644 index c7e6a49c..00000000 --- a/daemon/Command.h +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (C) 2014-2021 by Arm Limited. All rights reserved. */ - -#ifndef COMMAND_H -#define COMMAND_H - -#include "lib/SharedMemory.h" - -#include -#include -#include - -#include - -class Command { -public: - static Command run(const std::function & terminationCallback); - - void start(); - void cancel(); - void join() { thread.join(); }; - - int getPid() const { return pid; }; - -private: - enum State { - INITIALIZING, - RUNNING, - BEING_KILLED, - KILLED_OR_EXITED, - }; - - struct SharedData { - sem_t start {}; - std::atomic state {}; - - SharedData() { sem_init(&start, 1, 0); } - - ~SharedData() { sem_destroy(&start); } - }; - - int pid; - std::thread thread; - shared_memory::unique_ptr sharedData; - - Command(int pid, std::thread thread, shared_memory::unique_ptr sharedData) - : pid {pid}, thread {std::move(thread)}, sharedData {std::move(sharedData)} - { - } -}; - -#endif // COMMAND_H diff --git a/daemon/Config.h b/daemon/Config.h index eed5c409..4d8029f4 100644 --- a/daemon/Config.h +++ b/daemon/Config.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2010-2022 by Arm Limited. All rights reserved. */ #ifndef CONFIG_H #define CONFIG_H @@ -48,4 +48,24 @@ #endif #endif +#ifndef CONFIG_DISABLE_CONTINUATION_TRACING +#define CONFIG_DISABLE_CONTINUATION_TRACING 0 +#endif + +#ifndef CONFIG_ASSERTIONS +#if (!defined(NDEBUG) || (defined(GATOR_UNIT_TESTS) && GATOR_UNIT_TESTS)) +#define CONFIG_ASSERTIONS 1 +#else +#define CONFIG_ASSERTIONS 0 +#endif +#endif + +#ifndef CONFIG_LOG_TRACE +#if (!defined(NDEBUG) || (defined(GATOR_UNIT_TESTS) && GATOR_UNIT_TESTS)) +#define CONFIG_LOG_TRACE 1 +#else +#define CONFIG_LOG_TRACE 0 +#endif +#endif + #endif // CONFIG_H diff --git a/daemon/ExternalDriver.cpp b/daemon/ExternalDriver.cpp index d3129335..3322b40e 100644 --- a/daemon/ExternalDriver.cpp +++ b/daemon/ExternalDriver.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2010-2022 by Arm Limited. All rights reserved. */ #include "ExternalDriver.h" @@ -223,8 +223,12 @@ void ExternalDriver::start() pos = HEADER_SIZE; // ns/sec / samples/sec = ns/sample // For sample rate of none, sample every 100ms - buffer_utils::packInt(buf, pos, NS_PER_S / (gSessionData.mSampleRate == 0 ? 10 : gSessionData.mSampleRate)); - buffer_utils::packInt(buf, pos, gSessionData.mLiveRate); + static constexpr std::uint64_t min_rate = 10UL; + buffer_utils::packInt( + buf, + pos, + static_cast(NS_PER_S / (gSessionData.mSampleRate == 0 ? min_rate : gSessionData.mSampleRate))); + buffer_utils::packInt(buf, pos, static_cast(gSessionData.mLiveRate)); buffer_utils::writeLEInt(buf + 1, pos); if (!lib::writeAll(mUds, buf, pos)) { LOG_ERROR("Unable to send start message"); diff --git a/daemon/ExternalSource.cpp b/daemon/ExternalSource.cpp index f928ee5d..7fb83434 100644 --- a/daemon/ExternalSource.cpp +++ b/daemon/ExternalSource.cpp @@ -159,7 +159,7 @@ class ExternalSourceImpl : public ExternalSource { prctl(PR_SET_NAME, reinterpret_cast(&"gatord-external"), 0, 0, 0); // Gator runs at a high priority, reset the priority to the default - if (setpriority(PRIO_PROCESS, syscall(__NR_gettid), 0) == -1) { + if (setpriority(PRIO_PROCESS, lib::gettid(), 0) == -1) { LOG_ERROR("setpriority failed"); handleException(); } diff --git a/daemon/FtraceDriver.cpp b/daemon/FtraceDriver.cpp index fac54e79..d4d55f3f 100644 --- a/daemon/FtraceDriver.cpp +++ b/daemon/FtraceDriver.cpp @@ -7,10 +7,10 @@ #include "Logging.h" #include "PrimarySourceProvider.h" #include "SessionData.h" -#include "Tracepoints.h" #include "lib/FileDescriptor.h" #include "lib/String.h" #include "lib/Utils.h" +#include "linux/Tracepoints.h" #include "linux/perf/IPerfAttrsConsumer.h" #include @@ -280,7 +280,7 @@ namespace { } // Gator runs at a high priority, reset the priority to the default - if (setpriority(PRIO_PROCESS, syscall(__NR_gettid), 0) == -1) { + if (setpriority(PRIO_PROCESS, lib::gettid(), 0) == -1) { LOG_ERROR("setpriority failed"); handleException(); } @@ -435,8 +435,8 @@ void FtraceDriver::readEvents(mxml_node_t * const xml) } // The perf clock was added in 3.10 - const int kernelVersion = lib::parseLinuxVersion(utsname); - if (kernelVersion < KERNEL_VERSION(3, 10, 0)) { + auto const kernelVersion = lib::parseLinuxVersion(utsname); + if (kernelVersion < KERNEL_VERSION(3U, 10U, 0U)) { mSupported = false; LOG_SETUP("Ftrace is disabled\nFor full ftrace functionality please upgrade to Linux 3.10 or later. With " "user space " @@ -444,10 +444,10 @@ void FtraceDriver::readEvents(mxml_node_t * const xml) "available."); return; } - mMonotonicRawSupport = kernelVersion >= KERNEL_VERSION(4, 2, 0); + mMonotonicRawSupport = kernelVersion >= KERNEL_VERSION(4U, 2U, 0U); // Is debugfs or tracefs available? - if (access(traceFsConstants.path, R_OK) != 0) { + if (::access(traceFsConstants.path, R_OK) != 0) { mSupported = false; LOG_SETUP("Ftrace is disabled\nUnable to locate the tracing directory"); return; @@ -500,7 +500,7 @@ void FtraceDriver::readEvents(mxml_node_t * const xml) } if (enable != nullptr) { lib::printf_str_t buf {"%s/%s/enable", traceFsConstants.path__events, enable}; - if (access(buf, W_OK) != 0) { + if (::access(buf, W_OK) != 0) { LOG_SETUP("%s is disabled\n%s was not found", counter, buf.c_str()); continue; } @@ -508,8 +508,8 @@ void FtraceDriver::readEvents(mxml_node_t * const xml) LOG_DEBUG("Using ftrace for %s", counter); if (is_cpu_frequency) { - bool const has_cpuinfo = (access("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq", R_OK) == 0); - bool const has_scaling = (access("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", R_OK) == 0); + bool const has_cpuinfo = (::access("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq", R_OK) == 0); + bool const has_scaling = (::access("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", R_OK) == 0); if (has_cpuinfo || has_scaling) { setCounters( new CpuFrequencyFtraceCounter(getCounters(), traceFsConstants, counter, enable, has_cpuinfo)); @@ -552,7 +552,8 @@ std::pair, bool> FtraceDriver::prepare() { int fd; // The below call can be slow on loaded high-core count systems. - fd = open(traceFsConstants.path__trace, O_WRONLY | O_TRUNC | O_CLOEXEC); + // NOLINTNEXTLINE(hicpp-signed-bitwise) + fd = ::open(traceFsConstants.path__trace, O_WRONLY | O_TRUNC | O_CLOEXEC); if (fd < 0) { LOG_ERROR("Unable truncate ftrace buffer: %s", strerror(errno)); handleException(); @@ -571,7 +572,7 @@ std::pair, bool> FtraceDriver::prepare() // core count systems. The idea is that hopefully only on the first // capture, the trace clock needs to be changed. On subsequent captures, // the right clock is already being used. - int fd = open(traceFsConstants.path__trace_clock, O_RDONLY | O_CLOEXEC); + int fd = ::open(traceFsConstants.path__trace_clock, O_RDONLY | O_CLOEXEC); if (fd < 0) { LOG_ERROR("Couldn't open %s", traceFsConstants.path__trace_clock); handleException(); @@ -598,7 +599,7 @@ std::pair, bool> FtraceDriver::prepare() } if (!gSessionData.mFtraceRaw) { - const int fd = open(traceFsConstants.path__trace_pipe, O_RDONLY | O_CLOEXEC); + const int fd = ::open(traceFsConstants.path__trace_pipe, O_RDONLY | O_CLOEXEC); if (fd < 0) { LOG_ERROR("Unable to open trace_pipe"); handleException(); @@ -633,7 +634,7 @@ std::pair, bool> FtraceDriver::prepare() lib::printf_str_t buf {"%s/per_cpu/cpu%zu/trace_pipe_raw", traceFsConstants.path, cpu}; - const int tfd = open(buf, O_RDONLY | O_CLOEXEC); + const int tfd = ::open(buf, O_RDONLY | O_CLOEXEC); (new FtraceReader(&mBarrier, cpu, tfd, pfd[0], pfd[1], pageSize))->start(); result.first.push_back(pfd[0]); } diff --git a/daemon/FtraceDriver.h b/daemon/FtraceDriver.h index cdb845b5..59294d4d 100644 --- a/daemon/FtraceDriver.h +++ b/daemon/FtraceDriver.h @@ -4,7 +4,7 @@ #define FTRACEDRIVER_H #include "SimpleDriver.h" -#include "Tracepoints.h" +#include "linux/Tracepoints.h" #include #include diff --git a/daemon/GatorMain.cpp b/daemon/GatorMain.cpp index 5b81ff62..25932219 100644 --- a/daemon/GatorMain.cpp +++ b/daemon/GatorMain.cpp @@ -43,9 +43,6 @@ #include #include -using gator::android::IAppGatorRunner; -using gator::io::IMonitor; - namespace { std::array signalPipe; @@ -61,9 +58,6 @@ namespace { //Gator ready messages constexpr std::string_view gator_shell_ready = "Gator ready"; - constexpr std::string_view gator_agent_ready = "Gator agent ready"; - constexpr std::string_view start_app_msg = "start app\n"; - constexpr int AGENT_STD_OUT_UNEXPECTED_MESSAGE_LIMIT = 32; constexpr unsigned int VERSION_STRING_CHAR_SIZE = 256; } @@ -130,6 +124,11 @@ void updateSessionData(const ParserResult & result) gSessionData.mAndroidPackage = result.mAndroidPackage; gSessionData.mAndroidActivity = result.mAndroidActivity; + // when profiling an android package, use the package name as the '--wait-process' value + if ((gSessionData.mAndroidPackage != nullptr) && (gSessionData.mWaitForProcessCommand == nullptr)) { + gSessionData.mWaitForProcessCommand = gSessionData.mAndroidPackage; + } + //These values are set from command line and are alos part of session.xml //and hence cannot be modified during parse session if ((result.parameterSetFlag & USE_CMDLINE_ARG_SAMPLE_RATE) != 0) { @@ -180,337 +179,9 @@ void dumpCounterDetails(const ParserResult & result, logging::log_setup_supplier } } -int startAppGator(int argc, char ** argv) -{ - auto stripped_args = std::vector(); - // TODO: use Boost filter iterator - stripped_args.push_back(argv[0]); - stripped_args.insert(stripped_args.end(), &argv[2], &argv[argc]); - - auto global_logging = std::make_shared(); - - logging::set_log_sink(global_logging); - logging::last_log_error_supplier_t last_log_error_supplier { - [global_logging]() { return global_logging->get_last_log_error(); }}; - logging::log_setup_supplier_t log_setup_supplier { - [global_logging]() { return global_logging->get_log_setup_messages(); }}; - - GatorCLIParser parser; - global_logging->set_debug_enabled(GatorCLIParser::hasDebugFlag(argc, argv)); - - if (stripped_args.size() > std::numeric_limits::max()) { - LOG_ERROR("Command line too long"); - return EXCEPTION_EXIT_CODE; - } - parser.parseCLIArguments(static_cast(stripped_args.size()), - stripped_args.data(), - "", - MAX_PERFORMANCE_COUNTERS, - gSrcMd5); - const ParserResult & result = parser.result; - if (result.mode == ParserResult::ExecutionMode::EXIT) { - handleException(); - } - - updateSessionData(result); - - if (gSessionData.mLocalCapture) { - //for agent capture apc will be created in /data/data/ - //which is the cwd. - std::array cwd; - if (getcwd(cwd.data(), cwd.size()) != nullptr) { - std::string appCwd(cwd.data()); - auto apcPathInPackage = android_utils::getApcFolderInAndroidPackage(appCwd, result.mTargetPath); - if (apcPathInPackage.has_value()) { - if (gSessionData.mTargetPath != nullptr) { - free(const_cast(gSessionData.mTargetPath)); - } - gSessionData.mTargetPath = strdup(apcPathInPackage.value().c_str()); - LOG_DEBUG("The directory will be created at '%s'", gSessionData.mTargetPath); - } - else { - LOG_ERROR("Failed to create the directory in android package"); - return EXCEPTION_EXIT_CODE; - } - } - else { - LOG_ERROR("Failed to create the directory in android package"); - return EXCEPTION_EXIT_CODE; - } - } - // Call before setting up the SIGCHLD handler, as system() spawns child processes - Drivers drivers {result.mSystemWide, - readPmuXml(result.pmuPath), - result.mDisableCpuOnlining, - result.mDisableKernelAnnotations, - TraceFsConstants::detect()}; - - // Handle child exit codes - signal(SIGCHLD, handler); - - // an event handler that waits for capture events and forwards the notifications - // to the parent process via this process' stdout pipe - class local_event_handler_t : public capture::capture_process_event_listener_t { - public: - ~local_event_handler_t() override = default; - - void process_initialised() override { std::cout << gator_agent_ready.data() << std::endl; } - - void waiting_for_target() override { std::cout << start_app_msg.data() << std::endl; } - - } event_handler {}; - - capture::beginCaptureProcess(result, - drivers, - signalPipe, - last_log_error_supplier, - log_setup_supplier, - event_handler); - return 0; -} - -struct StateAndPid { - bool exited; - /** - * PID will contain the exit code once the process has finished. - */ - int pid; -}; - -int waitForAppAgentToExit(const std::string & packageName, - const std::string & activityName, - IAppGatorRunner & runner, - IMonitor & monitor, - const lib::PopenResult & cmdResult) -{ - std::string match(gator_agent_ready.data()); - match.append("\n"); - std::string matchStartAppMessage(start_app_msg.data()); - - StateAndPid agentState {false, 0}; - bool isStdOutReadFinished = false; - bool isStdErrReadFinished = false; - - bool isGatorReadyReceived = false; - bool isStartAppReceived = false; - - std::string appGatorMessageReader; - std::string appGatorErrorReader; - - auto activityManager = create_android_activity_manager(packageName, activityName); - if (!activityManager || !activityManager->stop()) { - LOG_WARNING("Attempt to stop the target activity failed. It may need to be terminated manually."); - } - while (!agentState.exited || !isStdOutReadFinished || !isStdErrReadFinished) { - std::array events; - int ready = monitor.wait(events.data(), events.size(), -1); - if (ready < 0) { - LOG_ERROR("Epoll wait on app gator FDs failed"); - break; - } - for (int i = 0; i < ready; ++i) { - if (events[i].data.fd == cmdResult.out) { - char value = '\0'; - if (lib::read(cmdResult.out, &value, 1) <= 0) { //EOF or error - if (agentState.exited) { - isStdOutReadFinished = true; - continue; - } - } - if (isGatorReadyReceived && isStartAppReceived) { - //Not processing any other std::out from agent - continue; - } - appGatorMessageReader += value; - if (!isGatorReadyReceived) { - if (match.rfind(appGatorMessageReader, 0) == std::string::npos) { - appGatorMessageReader.clear(); - } - isGatorReadyReceived = (appGatorMessageReader == match); - if (isGatorReadyReceived) { - std::cout << gator_shell_ready.data() << std::endl; - std::cout.flush(); - appGatorMessageReader.clear(); - } - } - else if (!isStartAppReceived) { - if (matchStartAppMessage.rfind(appGatorMessageReader, 0) == std::string::npos) { - appGatorMessageReader.clear(); - } - isStartAppReceived = appGatorMessageReader == matchStartAppMessage; - if (isStartAppReceived) { - appGatorMessageReader.clear(); - if (activityManager) { - if (!activityManager->start()) { - LOG_ERROR("Failed to start activity (%s) from package (%s)", // - activityName.c_str(), // - packageName.c_str()); - // send a signal to the child process so that it can exit cleanly. - // we'll get a SIGCHLD when it exits. - runner.sendSignalsToAppGator(SIGTERM); - //handling exception here to make sure the unused APC directory is deleted. - handleException(); - } - } - else { - LOG_DEBUG("Application (%s) could not be started, please start manually.", - packageName.c_str()); - } - } - } - else { - if (appGatorMessageReader.length() > AGENT_STD_OUT_UNEXPECTED_MESSAGE_LIMIT) { - LOG_DEBUG("Unexpected messages in std::out from agent (message = %s )", - appGatorMessageReader.c_str()); - appGatorMessageReader.clear(); - } - } - } - else if (events[i].data.fd == cmdResult.err) { - char value = '\0'; - if (lib::read(cmdResult.err, &value, 1) <= 0) { //EOF or error - if (agentState.exited) { - isStdErrReadFinished = true; - } - } - if (value != '\n') { - appGatorErrorReader += value; - } - else { - //Log what ever read so far - LOG_ERROR("From Agent %s", appGatorErrorReader.c_str()); - appGatorErrorReader.clear(); - } - } - else if (events[i].data.fd == signalPipe[0]) { - int signum; - const auto amountRead = lib::read(signalPipe[0], &signum, sizeof(signum)); - if (amountRead != sizeof(signum)) { - // NOLINTNEXTLINE(concurrency-mt-unsafe) - LOG_DEBUG("read failed %d %s", errno, strerror(errno)); - } - if (signum == SIGCHLD) { - LOG_DEBUG("Received SIGCHILD"); - - int status = 0; - auto pid = lib::waitpid(-1, &status, WNOHANG); - - // NOLINTNEXTLINE(hicpp-signed-bitwise) - if (pid == pid_t(-1)) { - // NOLINTNEXTLINE(concurrency-mt-unsafe) - LOG_DEBUG("waitpid() failed %d (%s)", errno, strerror(errno)); - // wasn't gator-child or it was but just a stop/continue - // so just ignore it - continue; - } - - if (pid == 0) { - LOG_DEBUG("waitpid() returned zero; spurious SIGCHILD"); - continue; - } - - LOG_DEBUG("waitpid() succeeded with status=%d, pid=%d", status, pid); - - if (pid != cmdResult.pid) { - LOG_DEBUG("... Ignoring as not the child process"); - continue; - } - - if (agentState.exited) { - LOG_DEBUG("... Ignoring as already exited"); - continue; - } - - // NOLINTNEXTLINE(hicpp-signed-bitwise) - if (WIFEXITED(status)) { - // NOLINTNEXTLINE(hicpp-signed-bitwise) - agentState = {true, WEXITSTATUS(status)}; - } - // NOLINTNEXTLINE(hicpp-signed-bitwise) - else if (WIFSIGNALED(status)) { - // NOLINTNEXTLINE(hicpp-signed-bitwise) - agentState = {true, WTERMSIG(status)}; - } - else { - LOG_DEBUG("... Ignoring as not exited or signal"); - } - } - else { - LOG_DEBUG("Forwarding signal %d to child process", signum); - runner.sendSignalsToAppGator(signum); - } - } - } - } - - // capture has ended so try to stop the target app - if (activityManager && !activityManager->stop()) { - LOG_WARNING("Attempt to stop the target activity failed. It may need to be terminated manually."); - } - return agentState.pid; -} - -int startShellGator(const ParserResult & result) -{ - auto maybe_app_gator_path = gSessionData.mAndroidPackage != nullptr // - ? gator::android::deploy_to_package(gSessionData.mAndroidPackage) // - : std::nullopt; - - if (!maybe_app_gator_path) { - LOG_ERROR("Unable to copy gatord to the target directory."); - handleException(); - } - - if (gSessionData.mLocalCapture && !android_utils::canCreateApcDirectory(gSessionData.mTargetPath)) { - LOG_ERROR("Failed to create the directory '%s'", gSessionData.mTargetPath); - handleException(); - } - int exitCode = 0; - const auto * const activityName = result.mAndroidActivity == nullptr ? "" : result.mAndroidActivity; - // Handle child exit codes - signal(SIGCHLD, handler); - - auto runner = - gator::android::create_app_gator_runner(maybe_app_gator_path.value(), result.mAndroidPackage, "--child"); - // start an epoll loop to read from the child & signal pipes - auto cmdResult = runner->startGator(result.getArgValuePairs()); - auto monitor = gator::io::create_monitor(); - if (!monitor->init() || !monitor->add(signalPipe[0]) || !monitor->add(cmdResult->out) - || !monitor->add(cmdResult->err)) { - LOG_ERROR("Failed to set up the IO event loop. Capture cannot continue."); - handleException(); - } - - if (cmdResult.has_value()) { - if (cmdResult->pid < 0) { - LOG_ERROR("Failed to start a gator process. Errno: %d", cmdResult->pid); - return cmdResult->pid; - } - - exitCode = waitForAppAgentToExit(result.mAndroidPackage, activityName, *runner, *monitor, cmdResult.value()); - - if (exitCode == 0) { - if (gSessionData.mLocalCapture && gSessionData.mAndroidPackage != nullptr - && gSessionData.mTargetPath != nullptr) { - if (!android_utils::copyApcToActualPath(std::string(gSessionData.mAndroidPackage), - std::string(gSessionData.mTargetPath))) { - LOG_ERROR("There was an error while copying apc, please try manually to pull from (/data/data/%s)", - gSessionData.mAndroidPackage); - } - } - } - } - else { - LOG_ERROR("Failed to get a popenresult "); - return EXCEPTION_EXIT_CODE; - } - - return exitCode; -} - -int gator_local_capture(const ParserResult & result, - const logging::last_log_error_supplier_t & last_log_error_supplier, - const logging::log_setup_supplier_t & log_setup_supplier) +int start_capture_process(const ParserResult & result, + const logging::last_log_error_supplier_t & last_log_error_supplier, + const logging::log_setup_supplier_t & log_setup_supplier) { // Call before setting up the SIGCHLD handler, as system() spawns child processes Drivers drivers {result.mSystemWide, @@ -552,14 +223,19 @@ int gator_local_capture(const ParserResult & result, std::cout << gator_shell_ready.data() << std::endl; } - void waiting_for_target() override + [[nodiscard]] bool waiting_for_target() override { - if (activity_manager) { - LOG_DEBUG("Starting the target application now..."); - if (!activity_manager->start()) { - LOG_ERROR("The target application could not be started automatically. Please start it manually."); - } + if (!activity_manager) { + return true; + } + + LOG_DEBUG("Starting the target application now..."); + if (activity_manager->start()) { + return true; } + + LOG_ERROR("The target application could not be started automatically. Please start it manually."); + return false; } private: @@ -606,13 +282,7 @@ int gator_main(int argc, char ** argv) signal(SIGABRT, handler); signal(SIGHUP, handler); signal(SIGUSR1, handler); - gator::process::set_parent_death_signal(SIGHUP); - - // check for the special command line arg to see if we're being asked to - // start in child mode - if (argc > 2 && strcmp("--child", argv[1]) == 0) { - return startAppGator(argc, argv); - } + gator::process::set_parent_death_signal(SIGKILL); prctl(PR_SET_NAME, reinterpret_cast(&"gatord-main"), 0, 0, 0); @@ -644,19 +314,14 @@ int gator_main(int argc, char ** argv) // configure any environment settings we'll need to start sampling // e.g. perf security settings. - auto environment = capture::prepareCaptureEnvironment(gSessionData); - - // if we're not being asked to do a system-wide capture then start the gator agent in the - // context of the target android app - if (!gSessionData.mSystemWide && gSessionData.mAndroidPackage != nullptr) { - return startShellGator(result); - } + auto environment = capture::prepareCaptureEnvironment(); + environment->postInit(gSessionData); if (result.mode == ParserResult::ExecutionMode::PRINT) { dumpCounterDetails(result, log_setup_supplier); } else { - return gator_local_capture(result, last_log_error_supplier, log_setup_supplier); + return start_capture_process(result, last_log_error_supplier, log_setup_supplier); } return 0; diff --git a/daemon/LocalCapture.cpp b/daemon/LocalCapture.cpp index 65020344..9f368e09 100644 --- a/daemon/LocalCapture.cpp +++ b/daemon/LocalCapture.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2010-2022 by Arm Limited. All rights reserved. */ #include "LocalCapture.h" @@ -26,14 +26,14 @@ static char * createUniqueDirectory(const char * initialPath, const char * endin handleException(); } else if (initialPath[0] != '/') { - if (getcwd(path, PATH_MAX) == nullptr) { + if (getcwd(path, PATH_MAX - 1) == nullptr) { LOG_DEBUG("Unable to retrieve the current working directory"); } strncat(path, "/", PATH_MAX - strlen(path) - 1); strncat(path, initialPath, PATH_MAX - strlen(path) - 1); } else { - strncpy(path, initialPath, PATH_MAX); + strncpy(path, initialPath, PATH_MAX - 1); path[PATH_MAX - 1] = 0; // strncpy does not guarantee a null-terminated string } @@ -96,7 +96,7 @@ namespace local_capture { char dstfilename[PATH_MAX]; for (const auto & element : list) { - strncpy(dstfilename, gSessionData.mAPCDir, PATH_MAX); + strncpy(dstfilename, gSessionData.mAPCDir, PATH_MAX - 1); dstfilename[PATH_MAX - 1] = 0; // strncpy does not guarantee a null-terminated string if (gSessionData.mAPCDir[strlen(gSessionData.mAPCDir) - 1] != '/') { strncat(dstfilename, "/", PATH_MAX - strlen(dstfilename) - 1); diff --git a/daemon/Logging.h b/daemon/Logging.h index 2619ab68..2e41ee4c 100644 --- a/daemon/Logging.h +++ b/daemon/Logging.h @@ -2,6 +2,7 @@ #pragma once +#include "Config.h" #include "lib/Span.h" #include "lib/source_location.h" @@ -12,6 +13,7 @@ #define LOG_ITEM(level, format, ...) \ ::logging::detail::do_log_item((level), lib::source_loc_t {__FILE__, __LINE__}, (format), ##__VA_ARGS__) +#if CONFIG_LOG_TRACE /** Log a 'trace' level item */ #define LOG_TRACE(format, ...) \ do { \ @@ -19,6 +21,13 @@ LOG_ITEM(::logging::log_level_t::trace, (format), ##__VA_ARGS__); \ } \ } while (false) +#else +/** ignore LOG_TRACE */ +template +inline void LOG_TRACE(char const *, Args &&...) +{ +} +#endif /** Log a 'debug' level item */ #define LOG_DEBUG(format, ...) LOG_ITEM(::logging::log_level_t::debug, (format), ##__VA_ARGS__) @@ -38,6 +47,20 @@ /** Log a 'fatal' level item */ #define LOG_FATAL(format, ...) LOG_ITEM(::logging::log_level_t::fatal, (format), ##__VA_ARGS__) +/** Log a 'child stdout' level item */ +#define LOG_STDOUT(tid, text) \ + ::logging::detail::do_log_item((tid), \ + ::logging::log_level_t::child_stdout, \ + lib::source_loc_t {__FILE__, __LINE__}, \ + (text)) + +/** Log a 'child stderr' level item */ +#define LOG_STDERR(tid, text) \ + ::logging::detail::do_log_item((tid), \ + ::logging::log_level_t::child_stderr, \ + lib::source_loc_t {__FILE__, __LINE__}, \ + (text)) + /** Log an 'error' if the value of ec is not EOF */ #define LOG_ERROR_IF_NOT_EOF(ec, format, ...) \ do { \ @@ -46,6 +69,13 @@ } \ } while (false) +#define LOG_ERROR_IF_NOT_EOF_OR_CANCELLED(ec, format, ...) \ + do { \ + if (((ec) != boost::asio::error::eof) && ((ec) != boost::asio::error::operation_aborted)) { \ + LOG_ERROR((format), ##__VA_ARGS__); \ + } \ + } while (false) + namespace logging { /** Possible logging levels */ enum class log_level_t { @@ -56,6 +86,8 @@ namespace logging { warning, error, fatal, + child_stdout, + child_stderr, }; // the source location @@ -105,6 +137,12 @@ namespace logging { source_loc_t const & location, const char * format, ...); + + /** Write out a log item */ + void do_log_item(log_level_t level, source_loc_t const & location, std::string_view msg); + + /** Write out a log item */ + void do_log_item(pid_t tid, log_level_t level, source_loc_t const & location, std::string_view msg); } /** @@ -116,6 +154,16 @@ namespace logging { */ void log_item(log_level_t level, source_loc_t const & location, std::string_view message); + /** + * Store some log item to the log + * + * @param tid The originating thread ID + * @param level The log level + * @param location The file/line source location + * @param message The log message + */ + void log_item(thread_id_t tid, log_level_t level, source_loc_t const & location, std::string_view message); + /** * Store some log item to the log * diff --git a/daemon/PrimarySourceProvider.cpp b/daemon/PrimarySourceProvider.cpp index 822397f8..c70c955c 100644 --- a/daemon/PrimarySourceProvider.cpp +++ b/daemon/PrimarySourceProvider.cpp @@ -2,12 +2,14 @@ #include "PrimarySourceProvider.h" +#include "Child.h" #include "Config.h" #include "CpuUtils.h" #include "DiskIODriver.h" #include "FSDriver.h" #include "HwmonDriver.h" #include "ICpuInfo.h" +#include "ISender.h" #include "Logging.h" #include "MemInfoDriver.h" #include "NetDriver.h" @@ -20,7 +22,6 @@ #if CONFIG_SUPPORT_PERF #include "linux/perf/PerfDriver.h" #include "linux/perf/PerfDriverConfiguration.h" -#include "linux/perf/PerfSource.h" #endif #if CONFIG_SUPPORT_PROC_POLLING #include "non_root/NonRootDriver.h" @@ -169,18 +170,28 @@ namespace { [[nodiscard]] lib::Span getDetectedUncorePmus() const override { return uncorePmus; } - std::unique_ptr createPrimarySource(sem_t & senderSem, - std::function profilingStartedCallback, - const std::set & appTids, - FtraceDriver & ftraceDriver, - bool enableOnCommandExec) override + std::unique_ptr createPrimarySource( + sem_t & senderSem, + ISender & sender, + std::function session_ended_callback, + std::function execTargetAppCallback, + std::function profilingStartedCallback, + const std::set & appTids, + FtraceDriver & ftraceDriver, + bool enableOnCommandExec, + agents::agent_workers_process_t & agent_workers_process) override { return driver.create_source(senderSem, - profilingStartedCallback, + sender, + std::move(session_ended_callback), + std::move(execTargetAppCallback), + std::move(profilingStartedCallback), appTids, ftraceDriver, enableOnCommandExec, - cpuInfo); + cpuInfo, + uncorePmus, + agent_workers_process); } private: @@ -295,14 +306,22 @@ namespace { [[nodiscard]] lib::Span getDetectedUncorePmus() const override { return {}; } - std::unique_ptr createPrimarySource(sem_t & senderSem, - std::function profilingStartedCallback, - const std::set & /*appTids*/, - FtraceDriver & /*ftraceDriver*/, - bool /*enableOnCommandExec*/) override + std::unique_ptr createPrimarySource( + sem_t & senderSem, + ISender & /*sender*/, + std::function /*session_ended_callback*/, + std::function execTargetAppCallback, + std::function profilingStartedCallback, + const std::set & /*appTids*/, + FtraceDriver & /*ftraceDriver*/, + bool /*enableOnCommandExec*/, + agents::agent_workers_process_t & /*agent_workers_process*/) override { - return std::unique_ptr( - new non_root::NonRootSource(driver, senderSem, profilingStartedCallback, cpuInfo)); + return std::unique_ptr(new non_root::NonRootSource(driver, + senderSem, + std::move(execTargetAppCallback), + std::move(profilingStartedCallback), + cpuInfo)); } private: diff --git a/daemon/PrimarySourceProvider.h b/daemon/PrimarySourceProvider.h index 732f8757..1a89c4df 100644 --- a/daemon/PrimarySourceProvider.h +++ b/daemon/PrimarySourceProvider.h @@ -3,6 +3,8 @@ #ifndef INCLUDE_PRIMARYSOURCEPROVIDER_H #define INCLUDE_PRIMARYSOURCEPROVIDER_H +#include "ISender.h" +#include "agents/agent_workers_process.h" #include "lib/Span.h" #include "linux/perf/PerfEventGroupIdentifier.h" @@ -80,10 +82,14 @@ class PrimarySourceProvider { /** Create the primary Source instance */ [[nodiscard]] virtual std::unique_ptr createPrimarySource( sem_t & senderSem, + ISender & sender, + std::function session_ended_callback, + std::function execTargetAppCallback, std::function profilingStartedCallback, const std::set & appTids, FtraceDriver & ftraceDriver, - bool enableOnCommandExec) = 0; + bool enableOnCommandExec, + agents::agent_workers_process_t & agent_workers_process) = 0; [[nodiscard]] virtual const ICpuInfo & getCpuInfo() const = 0; [[nodiscard]] virtual ICpuInfo & getCpuInfo() = 0; diff --git a/daemon/ProtocolVersion.h b/daemon/ProtocolVersion.h index 49af4d18..eb9397ae 100644 --- a/daemon/ProtocolVersion.h +++ b/daemon/ProtocolVersion.h @@ -3,8 +3,8 @@ /* Define the product release version / protocol version */ -// Protocol version Streamline v8.0 -#define PROTOCOL_VERSION 800 +// Protocol version Streamline v8.1 +#define PROTOCOL_VERSION 810 // Differentiates development versions from release code #define PROTOCOL_VERSION_DEV_MULTIPLIER 100000 diff --git a/daemon/SessionData.cpp b/daemon/SessionData.cpp index 21e96670..952944bd 100644 --- a/daemon/SessionData.cpp +++ b/daemon/SessionData.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2010-2022 by Arm Limited. All rights reserved. */ #include "SessionData.h" @@ -139,13 +139,3 @@ void SessionData::parseSessionXML(char * xmlString) handleException(); } } - -uint64_t getTime() -{ - struct timespec ts; - if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) != 0) { - LOG_ERROR("Failed to get uptime"); - handleException(); - } - return (NS_PER_S * ts.tv_sec + ts.tv_nsec); -} diff --git a/daemon/SessionData.h b/daemon/SessionData.h index 16daf5eb..0593d330 100644 --- a/daemon/SessionData.h +++ b/daemon/SessionData.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2010-2022 by Arm Limited. All rights reserved. */ #ifndef SESSION_DATA_H #define SESSION_DATA_H @@ -9,6 +9,7 @@ #include "Counter.h" #include "GatorCLIFlags.h" #include "ProtocolVersion.h" +#include "Time.h" #include "lib/SharedMemory.h" #include "mxml/mxml.h" @@ -22,10 +23,6 @@ #include -#define NS_PER_S 1000000000LL -#define NS_PER_MS 1000000LL -#define NS_PER_US 1000LL - extern const char MALI_GRAPHICS[]; extern const size_t MALI_GRAPHICS_SIZE; @@ -106,8 +103,6 @@ class SessionData { extern SessionData gSessionData; extern const char * const gSrcMd5; -uint64_t getTime(); - void logCpuNotFound(); #endif // SESSION_DATA_H diff --git a/daemon/SessionXML.cpp b/daemon/SessionXML.cpp index 14271467..c63f5c48 100644 --- a/daemon/SessionXML.cpp +++ b/daemon/SessionXML.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2010-2022 by Arm Limited. All rights reserved. */ #include "SessionXML.h" @@ -67,13 +67,15 @@ void SessionXML::sessionTag(mxml_node_t * tree, mxml_node_t * node) } // copy to pre-allocated strings if (mxmlElementGetAttr(node, ATTR_BUFFER_MODE) != nullptr) { - strncpy(parameters.buffer_mode, mxmlElementGetAttr(node, ATTR_BUFFER_MODE), sizeof(parameters.buffer_mode)); + strncpy(parameters.buffer_mode, mxmlElementGetAttr(node, ATTR_BUFFER_MODE), sizeof(parameters.buffer_mode) - 1); parameters.buffer_mode[sizeof(parameters.buffer_mode) - 1] = 0; // strncpy does not guarantee a null-terminated string } if (((gSessionData.parameterSetFlag & USE_CMDLINE_ARG_SAMPLE_RATE) == 0)) { if (mxmlElementGetAttr(node, ATTR_SAMPLE_RATE) != nullptr) { - strncpy(parameters.sample_rate, mxmlElementGetAttr(node, ATTR_SAMPLE_RATE), sizeof(parameters.sample_rate)); + strncpy(parameters.sample_rate, + mxmlElementGetAttr(node, ATTR_SAMPLE_RATE), + sizeof(parameters.sample_rate) - 1); parameters.sample_rate[sizeof(parameters.sample_rate) - 1] = 0; // strncpy does not guarantee a null-terminated string } diff --git a/daemon/Time.h b/daemon/Time.h index d385141e..73b25cb8 100644 --- a/daemon/Time.h +++ b/daemon/Time.h @@ -2,7 +2,37 @@ #pragma once +#include "Logging.h" + #include +#include + +static constexpr std::uint64_t NS_PER_S = 1000000000ULL; +static constexpr std::uint64_t NS_PER_MS = 1000000ULL; +static constexpr std::uint64_t NS_PER_US = 1000ULL; /** Opaque numeric timestamp type, representing the time in nanoseconds since the capture start */ enum class monotonic_delta_t : std::uint64_t; + +#if defined(GATOR_UNIT_TESTS) && (GATOR_UNIT_TESTS != 0) +std::uint64_t getTime(); +#else + +/** The getTime function reads the current value of CLOCK_MONOTONIC_RAW as a u64 in nanoseconds */ +inline std::uint64_t getTime() +{ + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) != 0) { + LOG_ERROR("Failed to get uptime"); + handleException(); + } + return (NS_PER_S * ts.tv_sec + ts.tv_nsec); +} + +#endif + +/** Convert the current CLOCK_MONOTONIC_RAW to some delta from the start of the capture */ +inline monotonic_delta_t monotonic_delta_now(std::uint64_t monotonic_start) +{ + return monotonic_delta_t(getTime() - monotonic_start); +} diff --git a/daemon/agents/agent_environment.cpp b/daemon/agents/agent_environment.cpp new file mode 100644 index 00000000..db4162c8 --- /dev/null +++ b/daemon/agents/agent_environment.cpp @@ -0,0 +1,167 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ +#include "agents/agent_environment.h" + +#include "Logging.h" +#include "agents/ext_source/ext_source_agent.h" +#include "ipc/raw_ipc_channel_sink.h" +#include "ipc/raw_ipc_channel_source.h" +#include "lib/AutoClosingFd.h" +#include "lib/String.h" +#include "logging/agent_log.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +namespace agents { + namespace { + constexpr std::size_t n_threads = 2; + + lib::AutoClosingFd dup_and_close(int fd) + { + lib::AutoClosingFd dup_fd {fcntl(fd, F_DUPFD_CLOEXEC)}; + + if (!dup_fd) { + // NOLINTNEXTLINE(concurrency-mt-unsafe) + LOG_DEBUG("fcntl failed with error %d (%s)", errno, strerror(errno)); + + // not ideal, but just use the FD directly + return lib::AutoClosingFd {fd}; + } + + // now close it + close(fd); + return dup_fd; + } + + void do_wait_signal(boost::asio::signal_set & signals, + std::shared_ptr env, + async::proc::process_monitor_t & process_monitor) + { + signals.async_wait([env = std::move(env), &signals, &process_monitor](auto const & ec, auto signo) mutable { + if (ec) { + LOG_DEBUG("Signal handler received error %s", ec.message().c_str()); + return; + } + //NOLINTNEXTLINE(concurrency-mt-unsafe) + LOG_DEBUG("Received signal %d %s", signo, strsignal(signo)); + if ((signo == SIGHUP) || (signo == SIGTERM) || (signo == SIGINT)) { + env->shutdown(); + } + else if (signo == SIGCHLD) { + process_monitor.on_sigchild(); + } + else { + do_wait_signal(signals, std::move(env), process_monitor); + } + }); + } + } + + int start_agent(lib::Span args, const environment_factory_t & factory) + { + // set process name + prctl(PR_SET_NAME, reinterpret_cast(&"gatord-agent-bootstrap"), 0, 0, 0); + + // Set up global thread-safe logging + auto agent_logging = + std::make_shared(STDERR_FILENO, logging::agent_log_sink_t::get_log_file_fd()); + + logging::set_log_sink(agent_logging); + logging::set_log_enable_trace(args); + + try { + LOG_DEBUG("Bootstrapping agent process."); + + // disable buffering on in/out/err + ::setvbuf(stdin, nullptr, _IONBF, 0); + ::setvbuf(stdout, nullptr, _IONBF, 0); + ::setvbuf(stderr, nullptr, _IONBF, 0); + + // get sighup if parent exits + ::prctl(PR_SET_PDEATHSIG, SIGKILL); + + // duplicate stdin/stdout, then close them so that some spurious read/write doesn't corrupt the IPC channel + auto ipc_in = dup_and_close(STDIN_FILENO); + auto ipc_out = dup_and_close(STDOUT_FILENO); + + // setup asio context + boost::asio::io_context io_context {}; + + // process monitor + async::proc::process_monitor_t process_monitor {io_context}; + + // handle the usual signals (and SIGHUP) so we can shutdown properly + boost::asio::signal_set signals {io_context}; + signals.add(SIGCHLD); + signals.add(SIGHUP); + signals.add(SIGTERM); + signals.add(SIGINT); + + // create our IPC channels + auto ipc_sink = ipc::raw_ipc_channel_sink_t::create(io_context, std::move(ipc_out)); + auto ipc_source = ipc::raw_ipc_channel_source_t::create(io_context, std::move(ipc_in)); + + // create our agent + auto env = factory(args, io_context, process_monitor, ipc_sink, ipc_source); + // set process name + prctl(PR_SET_NAME, reinterpret_cast(env->name()), 0, 0, 0); + LOG_DEBUG("Starting agent [%s]", env->name()); + + // handle signals + do_wait_signal(signals, env, process_monitor); + + async_await_agent_shutdown(env, [&io_context]() { + // fully shut down + LOG_DEBUG("Agent is shutdown. Stopping io_context."); + io_context.stop(); + }); + + // start the agent + env->start(); + + // provide extra threads by way of pool + boost::asio::thread_pool threads {n_threads}; + + // start the io context on the thread pool (as the caller expects this function to return immediately) + for (std::size_t i = 0; i < n_threads; ++i) { + boost::asio::post(threads, [thread_no = i, &io_context]() { + constexpr std::size_t comm_len = 16; + + LOG_DEBUG("Launched worker thread %zu", thread_no); + + lib::printf_str_t comm_str {"gatord-iocx-%zu", thread_no}; + + prctl(PR_SET_NAME, reinterpret_cast(comm_str.c_str()), 0, 0, 0); + + // spin the io_context + io_context.run(); + }); + } + + // run the main work loop + io_context.run(); + + LOG_DEBUG("Terminating [%s] agent successfully.", env->name()); + } + catch (std::exception const & ex) { + LOG_FATAL("Unexpected exception received: what=%s", ex.what()); + return EXIT_FAILURE; + } + catch (...) { + LOG_FATAL("Unexpected exception received."); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; + } +} diff --git a/daemon/agents/agent_environment.h b/daemon/agents/agent_environment.h new file mode 100644 index 00000000..ce57c48e --- /dev/null +++ b/daemon/agents/agent_environment.h @@ -0,0 +1,377 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ +#pragma once + +#include "Logging.h" +#include "Protocol.h" +#include "async/continuations/async_initiate.h" +#include "async/continuations/continuation.h" +#include "async/continuations/operations.h" +#include "async/continuations/stored_continuation.h" +#include "async/continuations/use_continuation.h" +#include "async/proc/process_monitor.hpp" +#include "ipc/messages.h" +#include "ipc/raw_ipc_channel_sink.h" +#include "ipc/raw_ipc_channel_source.h" +#include "logging/agent_log.h" + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace agents { + + /* + * Agent classes are expected to look like: + * + * class foo_agent_t { + * public: + * // public typedef that tells the environment what types of IPC messages the agent + * // is interested in + * using accepted_message_types = std::tuple; + * + * // for each type declared in 'accepted_message_types' a public member function to handle + * // that message and return some type of continuation. + * auto co_receive_message(message_type m); + * + * // a member function that performs any cleanup tasks when the environment is + * // shutting down. must return some type of continuation + * auto co_shutdown(); + * }; + * + * Note: These methods are not threadsafe, so it is up to the caller to synchronise them (e.g. via a start_on(..) + * continuation). + */ + + /** + * A helper template that allows the agent_environment_t to dispatch messages to strongly-typed + * handlers on the agent instance. + */ + template + class message_binder_t + : public std::enable_shared_from_this> { + public: + message_binder_t(LifecycleReceiver & lifecycle_receiver, Receiver & receiver) + : lifecycle_receiver(lifecycle_receiver), receiver(receiver) + { + } + + async::continuations::polymorphic_continuation_t<> async_receive_next_message( + std::shared_ptr source) + { + using namespace async::continuations; + + return ipc::async_receive_one_of(std::move(source), + use_continuation) + | map_error() // + | unpack_variant([this](auto && msg) mutable { + return this->co_receive_message(std::forward(msg)); + }); + } + + private: + LifecycleReceiver & lifecycle_receiver; + Receiver & receiver; + + void co_receive_message(std::monostate /*msg*/) { LOG_DEBUG("Unexpected monostate IPC message received."); } + + auto co_receive_message(ipc::msg_shutdown_t msg) { return lifecycle_receiver.co_receive_message(msg); } + + template + auto co_receive_message(MessageType && msg) + { + return receiver.co_receive_message(std::forward(msg)); + } + }; + + /** + * A type-erased interface that allows an agent_environment_t to be manipulated in a generic way. + */ + class agent_environment_base_t { + public: + /** + * Callback type used by agents to trigger a clean shutdown in the event of a fatal error. Typically it will + * just call shutdown(). + */ + using terminator = std::function; + + virtual ~agent_environment_base_t() = default; + + /** + * Returns an identifier for this agent. Can be used to set the agent's process name. + */ + [[nodiscard]] virtual const char * name() const = 0; + + virtual void start() = 0; + + virtual void shutdown() = 0; + + /** + * Register a callback function to be invoked when the agent tranasitions into a + * shutdown state. + */ + virtual void add_shutdown_handler(async::continuations::stored_continuation_t<> && handler) = 0; + }; + + /** + * An agent environment manages the lifecycle of an agent instance. It's responsible for creating the agent + * instance and notifying the shell once it has started, and when it eventually shuts down. + */ + template + class agent_environment_t : public agent_environment_base_t, + public std::enable_shared_from_this> { + public: + using agent_factory = std::function(boost::asio::io_context &, + async::proc::process_monitor_t & process_monitor, + std::shared_ptr, + terminator)>; + + static auto create(std::string instance_name, + boost::asio::io_context & io, + async::proc::process_monitor_t & process_monitor, + agent_factory factory, + std::shared_ptr sink, + std::shared_ptr source) + { + return std::make_shared>(std::move(instance_name), + io, + process_monitor, + factory, + std::move(sink), + std::move(source)); + } + + /** + * Construct an environment that will use the supplied factory function to create the agent instance. + */ + explicit agent_environment_t(std::string instance_name, + boost::asio::io_context & io, + async::proc::process_monitor_t & process_monitor, + agent_factory factory, + std::shared_ptr sink, + std::shared_ptr source) + : instance_name(std::move(instance_name)), + io(io), + process_monitor(process_monitor), + factory(factory), + sink(std::move(sink)), + source(std::move(source)), + strand(io), + is_shutdown(false) + { + } + + const char * name() const override { return instance_name.c_str(); } + + void start() override + { + boost::asio::post(strand, [self = this->shared_from_this()]() mutable { self->on_strand_start(); }); + } + + void shutdown() override + { + using namespace async::continuations; + + spawn("Agent shutdown", + start_on(strand) // + | then([self = this->shared_from_this()]() mutable -> polymorphic_continuation_t<> { + if (std::exchange(self->is_shutdown, true)) { + LOG_DEBUG("Shutdown requested by agent, but shutdown already in progress"); + return {}; + } + return self->co_init_shutdown(); + })); + } + + void add_shutdown_handler(async::continuations::stored_continuation_t<> && handler) override + { + boost::asio::post(strand, [self = this->shared_from_this(), handler = std::move(handler)]() mutable { + self->on_strand_add_shutdown_hander(std::move(handler)); + }); + } + + auto co_receive_message(ipc::msg_shutdown_t /*msg*/) { return on_shutdown_received(); } + + private: + std::string instance_name; + boost::asio::io_context & io; + async::proc::process_monitor_t & process_monitor; + agent_factory factory; + std::shared_ptr sink; + std::shared_ptr source; + + boost::asio::io_context::strand strand; + + std::vector> shutdown_handlers {}; + bool is_shutdown; + + std::shared_ptr agent; + + template + struct message_binder_factory_t; + template + struct message_binder_factory_t> { + static auto create_message_binder(agent_environment_t & host, AgentType & agent) + { + return std::make_shared>(host, agent); + } + }; + + void on_strand_start() + { + using namespace async::continuations; + + if (agent) { + LOG_ERROR("Start message received but agent is already running"); + return; + } + + if (is_shutdown) { + LOG_ERROR("Start called after environment has shut down"); + return; + } + + // create the agent + auto self = this->shared_from_this(); + agent = factory( + io, + process_monitor, + sink, + // The terminator must use a weak_pointer otherwise the agent and env will contain references to each + // other, preventing destruction + [self_w = this->weak_from_this()]() { + auto self = self_w.lock(); + if (self) { + self->shutdown(); + } + }); + + // send msg_ready_t to the shell + spawn("agent message loop", + start_on(strand) // + | sink->async_send_message(ipc::msg_ready_t {}, use_continuation) // + | then([self](const auto & ec, auto /*msg*/) mutable -> polymorphic_continuation_t<> { + if (ec) { + LOG_ERROR("Error sending IPC ready message: %s", ec.message().c_str()); + return start_with(); + } + + return self->co_init_receive_loop(); + }), + [self](bool) { self->shutdown(); }); + } + + auto co_init_receive_loop() + { + using namespace async::continuations; + + using message_binder_factory_type = message_binder_factory_t; + auto binder = message_binder_factory_type::create_message_binder(*this, *agent); + + auto self = this->shared_from_this(); + return repeatedly( + [self]() { return start_on(self->strand) | then([self]() { return !self->is_shutdown; }); }, + [self, binder]() mutable { + return start_on(self->strand) | binder->async_receive_next_message(self->source); + }); + } + + async::continuations::polymorphic_continuation_t<> on_shutdown_received() + { + using namespace async::continuations; + + if (std::exchange(is_shutdown, true)) { + LOG_DEBUG("Shutdown message received, but shutdown already in progress"); + return {}; + } + + // ask the agent to shutdown first, then clean up the environment + return start_on(strand) | co_init_shutdown(); + } + + /** + * Post the shutdown message up to the shell and run the shutdown handlers. + */ + async::continuations::polymorphic_continuation_t<> co_init_shutdown() + { + using namespace async::continuations; + + auto self = this->shared_from_this(); + return start_on(strand) // + | then([self]() mutable -> polymorphic_continuation_t<> { // + // if the agent has been started make sure we shut it down + if (self->agent) { + return self->agent->co_shutdown(); + } + return {}; + }) + | then([self]() mutable { // + return self->sink->async_send_message(ipc::msg_shutdown_t {}, use_continuation); + }) // + | then([self](const auto & ec, const auto & /*msg*/) mutable { // + if (ec) { + LOG_DEBUG("Failed to send shutdown IPC to host due to %s", ec.message().c_str()); + } + else { + LOG_TRACE("Shutdown message sent"); + } + self->call_shutdown_handlers(); + }); + } + + void on_strand_add_shutdown_hander(async::continuations::stored_continuation_t<> && handler) + { + // call the handler directly if we've already shut down + if (is_shutdown) { + resume_continuation(io, std::move(handler)); + } + else { + shutdown_handlers.emplace_back(std::move(handler)); + } + } + + void call_shutdown_handlers() + { + for (auto & handler : shutdown_handlers) { + resume_continuation(io, std::move(handler)); + } + } + }; + + /** + * Schedules a completion handler to be invoked when the agent shuts down. + */ + template + auto async_await_agent_shutdown(std::shared_ptr agent, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate_explicit( + [agent = std::move(agent)](auto && sc) { agent->add_shutdown_handler(std::forward(sc)); }, + std::forward(token)); + } + + /** + * A factory function that start_agent will use to create the actual agent instance. + * This allows the common environment setup to be shared across all agents. + */ + using environment_factory_t = + std::function(lib::Span, + boost::asio::io_context &, + async::proc::process_monitor_t &, + std::shared_ptr, + std::shared_ptr)>; + + /** + * The main agent entrypoint. Sets up IPC pipes, logging, signal handlers, etc. that are + * the same for all agent processes. + */ + int start_agent(lib::Span args, const environment_factory_t & factory); +} diff --git a/daemon/agents/agent_worker.h b/daemon/agents/agent_worker.h index b6e9e78a..2c55061d 100644 --- a/daemon/agents/agent_worker.h +++ b/daemon/agents/agent_worker.h @@ -20,6 +20,7 @@ namespace agents { ready, shutdown_requested, shutdown_received, + terminated_pending_message_loop, terminated, }; diff --git a/daemon/agents/agent_worker_base.h b/daemon/agents/agent_worker_base.h index 0bc9e1bd..b0e5ac4e 100644 --- a/daemon/agents/agent_worker_base.h +++ b/daemon/agents/agent_worker_base.h @@ -5,6 +5,8 @@ #include "Logging.h" #include "agents/agent_worker.h" #include "agents/spawn_agent.h" +#include "async/continuations/async_initiate.h" +#include "async/continuations/stored_continuation.h" #include "ipc/raw_ipc_channel_sink.h" #include "lib/Assert.h" @@ -18,6 +20,38 @@ namespace agents { * Common base class for agent worker classes, implementing the agent worker interface and providing basic functionality such as state tracking and access to the IPC mechanism */ class agent_worker_base_t : public i_agent_worker_t { + public: + template + auto async_wait_launched(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate_explicit( + [this](auto && sc) { + auto & strand = work_strand(); + + submit(start_on(strand) // + | then([this, sc = sc.move()]() mutable { + runtime_assert((!launched_notification) && (!notified_launched), + "cannot queue multiple launch notifications"); + + // if the state has already changed, just call directly + if (state != state_t::launched) { + notified_launched = true; + resume_continuation(work_strand().context(), + std::move(sc), + state == state_t::ready); + } + // store it + else { + launched_notification = std::move(sc); + } + }), + sc.get_exceptionally()); + }, + std::forward(token)); + } + protected: agent_worker_base_t(agent_process_t agent_process, state_change_observer_t && state_change_observer) : agent_process(std::move(agent_process)), state_change_observer(std::move(state_change_observer)) @@ -27,7 +61,9 @@ namespace agents { } /** @return True if the state transition from 'old_state' to 'new_state' is valid */ - static constexpr bool is_valid_state_transition(state_t old_state, state_t new_state) + static constexpr bool is_valid_state_transition(state_t old_state, + state_t new_state, + bool message_loop_terminated) { if (old_state == new_state) { return false; @@ -51,8 +87,12 @@ namespace agents { || (old_state == state_t::shutdown_requested)); } + case state_t::terminated_pending_message_loop: { + return !message_loop_terminated; + } + case state_t::terminated: { - return true; + return message_loop_terminated; } default: { @@ -66,7 +106,15 @@ namespace agents { { const auto old_state = state; - if (!is_valid_state_transition(old_state, new_state)) { + // fix up terminated state transition which is dependent on message_loop_terminated value + if ((new_state == state_t::terminated_pending_message_loop) && message_loop_terminated) { + new_state = state_t::terminated; + } + else if ((new_state == state_t::terminated) && !message_loop_terminated) { + new_state = state_t::terminated_pending_message_loop; + } + + if (!is_valid_state_transition(old_state, new_state, message_loop_terminated)) { LOG_DEBUG("Invalid transition from state # %d -> %d", int(old_state), int(new_state)); return false; } @@ -74,18 +122,49 @@ namespace agents { LOG_DEBUG("Transitioning from state # %d -> %d", int(old_state), int(new_state)); state = new_state; - state_change_observer(agent_process.pid, old_state, new_state); + state_change_observer(agent_process.forked_process.get_pid(), old_state, new_state); + + // notify the listener + launched_notification_t launched_notification {std::move(this->launched_notification)}; + if (launched_notification) { + notified_launched = true; + resume_continuation(work_strand().context(), + std::move(launched_notification), + new_state == state_t::ready); + } return true; } + // subclasses must provide + [[nodiscard]] virtual boost::asio::io_context::strand & work_strand() = 0; + [[nodiscard]] state_t get_state() const { return state; } [[nodiscard]] ipc::raw_ipc_channel_sink_t & sink() const { return *agent_process.ipc_sink; } [[nodiscard]] ipc::raw_ipc_channel_source_t & source() const { return *agent_process.ipc_source; } + [[nodiscard]] std::shared_ptr source_shared() const + { + return agent_process.ipc_source; + } + + [[nodiscard]] bool exec_agent() { return agent_process.forked_process.exec(); } + + void set_message_loop_terminated() + { + message_loop_terminated = true; + if (state == state_t::terminated_pending_message_loop) { + transition_state(state_t::terminated); + } + } private: + using launched_notification_t = async::continuations::stored_continuation_t; + agent_process_t agent_process; state_change_observer_t state_change_observer; + launched_notification_t launched_notification; state_t state = state_t::launched; + bool notified_launched {false}; + bool message_loop_terminated {false}; }; } diff --git a/daemon/agents/agent_workers_process.h b/daemon/agents/agent_workers_process.h index 08dde6d6..afd51fe2 100644 --- a/daemon/agents/agent_workers_process.h +++ b/daemon/agents/agent_workers_process.h @@ -1,13 +1,16 @@ /* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ #pragma once + #include "agents/agent_worker.h" #include "agents/ext_source/ext_source_agent_worker.h" +#include "agents/perf/perf_agent_worker.h" #include "async/completion_handler.h" #include "async/continuations/async_initiate.h" #include "async/continuations/continuation.h" #include "async/continuations/operations.h" #include "async/continuations/use_continuation.h" +#include "async/proc/process_monitor.hpp" #include "lib/String.h" #include "lib/Syscall.h" @@ -92,12 +95,24 @@ namespace agents { auto async_add_external_source(ExternalSource & external_souce, CompletionToken && token) { return worker_manager.template async_add_agent>( + process_monitor, std::forward(token), std::ref(external_souce)); } + template + auto async_add_perf_source(EventHandler & event_handler, ConfigMsg && msg, CompletionToken && token) + { + return worker_manager.template async_add_agent>( + process_monitor, + std::forward(token), + std::ref(event_handler), + std::forward(msg)); + } + private: boost::asio::io_context io_context {}; + async::proc::process_monitor_t process_monitor {io_context}; boost::asio::signal_set signal_set {io_context}; boost::asio::thread_pool threads {n_threads}; WorkerManager worker_manager; @@ -107,13 +122,20 @@ namespace agents { { using namespace async::continuations; - repeatedly([this]() { return !worker_manager.is_terminated(); }, // - [this]() { - return signal_set.async_wait(use_continuation) // - | map_error() // - | then([this](int signo) { worker_manager.on_signal(signo); }); - }) // - | DETACH_LOG_ERROR("Signal handler loop"); + spawn("Signal handler loop", + repeatedly([this]() { return !worker_manager.is_terminated(); }, // + [this]() { + return signal_set.async_wait(use_continuation) // + | map_error() // + | then([this](int signo) { + if (signo == SIGCHLD) { + process_monitor.on_sigchild(); + } + else { + worker_manager.on_signal(signo); + } + }); + })); } /** The worker thread body */ @@ -170,10 +192,6 @@ namespace agents { LOG_DEBUG("Received signal %d", signo); parent.on_terminal_signal(signo); } - else if (signo == SIGCHLD) { - LOG_DEBUG("Received sigchld"); - do_waitpid_children(); - } else { LOG_DEBUG("Unexpected signal # %d", signo); } @@ -185,19 +203,19 @@ namespace agents { using namespace async::continuations; // spawn an async operation on the strand that performs the shutdown - start_on(strand) // - | then([this]() { - if (agent_workers.empty()) { - terminate(); - } - else { - LOG_DEBUG("Requesting all agents to shut down"); - for (auto & agent : agent_workers) { - agent.second->shutdown(); - } - } - }) // - | DETACH_LOG_ERROR("Join operation"); + spawn("Join operation", + start_on(strand) // + | then([this]() { + if (agent_workers.empty()) { + terminate(); + } + else { + LOG_DEBUG("Requesting all agents to shut down"); + for (auto & agent : agent_workers) { + agent.second->shutdown(); + } + } + })); } /** @@ -208,20 +226,106 @@ namespace agents { * @param args Any additional arguments that may be passed to the constructor of the worker type (any references must be wrapped in a std::reference_wrapper or similar) * @return Depends on the completion token type */ - template - auto async_add_agent(CompletionToken && token, Args &&... args) + template + auto async_add_agent(ProcessMonitor & process_monitor, CompletionToken && token, Args &&... args) { - return async::continuations::async_initiate_explicit( - [this](auto && receiver, auto && exceptionally, auto &&... args) mutable { - this->do_async_add_agent(std::forward(receiver), - std::forward(exceptionally), - std::forward(args)...); + using namespace async::continuations; + + return async_initiate( + [this, &process_monitor](auto &&... args) mutable { + LOG_DEBUG("Creating ext_source agent process"); + + return start_with(std::move(args)...) // + | post_on(strand) // + | then([this, &process_monitor](auto &&... args) -> polymorphic_continuation_t { + // do nothing if already terminated + if (terminated) { + return start_with(false); + } + + // start the process, returning the wrapper instance + return async_spawn_agent_worker(io_context, + spawner, + make_state_observer(), + use_continuation, + std::forward(args)...) // + | then([this, &process_monitor](auto worker) -> polymorphic_continuation_t { + // spawn failed, just let the handler know directly + if (!worker.second) { + return start_with(false); + } + + // great, store it + created_any = true; + agent_workers.emplace(worker); + + // monitor pid + observe_agent_pid(process_monitor, worker.first, worker.second); + + // now wait for it to be ready + return worker.second->async_wait_launched(use_continuation); + }); + }); }, token, std::forward(args)...); } private: + /** Monitor the agent process for termination */ + template + static void observe_agent_pid(ProcessMonitor & process_monitor, + pid_t pid, + std::shared_ptr const & worker) + { + using namespace async::continuations; + + spawn("observe_agent_pid", + process_monitor.async_monitor_forked_pid(pid, use_continuation) // + | then([&process_monitor, pid, worker](auto uid) { + auto repeat_flag = std::make_shared(true); + return repeatedly( + [repeat_flag]() { return *repeat_flag; }, + [&process_monitor, pid, uid, worker, repeat_flag]() { + LOG_DEBUG("Waiting for event %d", pid); + + return process_monitor.async_wait_event(uid, use_continuation) // + | then([pid, worker, repeat_flag](auto ec, auto event) { + if (ec) { + LOG_DEBUG("unexpected error reported for process %d (%s)", + pid, + ec.message().c_str()); + } + + switch (event.state) { + case async::proc::ptrace_process_state_t::no_such_process: + case async::proc::ptrace_process_state_t::terminated_exit: + case async::proc::ptrace_process_state_t::terminated_signal: { + // notify of the signal + LOG_DEBUG( + "Notifying worker that agent process %d (%p) terminated.", + pid, + worker.get()); + worker->on_sigchild(); + + *repeat_flag = false; + return; + } + + case async::proc::ptrace_process_state_t::attached: + case async::proc::ptrace_process_state_t::attaching: + default: { + LOG_TRACE("ignoring unexpected event state %s::%s", + to_cstring(event.type), + to_cstring(event.state)); + return; + } + } + }); + }); + })); + } + Parent & parent; i_agent_spawner_t & spawner; boost::asio::io_context & io_context; @@ -248,163 +352,28 @@ namespace agents { } } - /** Process the async request to start the external source worker */ - template - void do_async_add_agent(Receiver receiver, Exceptionally && exceptionally, Args &&... args) - { - using namespace async::continuations; - - LOG_DEBUG("Creating ext_source agent process"); - - submit(start_on(strand) // - | then([this, - r = std::forward(receiver), - args_tuple = - std::make_tuple...>(std::forward(args)...)]() mutable { - std::apply( - [this, r = std::move(r)](auto &&... args) mutable { - if (!on_strand_create_worker(std::move(r), - spawner, - std::forward(args)...)) { - LOG_ERROR("Could not start external source worker"); - } - }, - std::move(args_tuple)); - }), - std::forward(exceptionally)); - } - - /** Create one worker and add it to the map */ - template - bool on_strand_create_worker(Handler handler, i_agent_spawner_t & spawner, Args &&... args) - { - // do nothing if already terminated - if (terminated) { - boost::asio::post(io_context, [handler = std::move(handler)]() { handler(false); }); - return false; - } - - // start the process, returning the wrapper instance - auto worker = - spawn_agent_worker(io_context, spawner, make_state_observer(handler), std::forward(args)...); - - // spawn failed, just let the handler know directly - if (!worker.second) { - boost::asio::post(io_context, [handler = std::move(handler)]() { handler(false); }); - return false; - } - - // great, the handler will be called once the agent is ready - created_any = true; - agent_workers.emplace(worker); - return true; - } - /** Construct the state observer object for some agent process. This function will * process state changes, and update this class's state as appropriate. It will * also notify the agent-process-started handler at the correct time. */ - template - auto make_state_observer(Handler handler) + i_agent_worker_t::state_change_observer_t make_state_observer() { using namespace async::continuations; - return [this, handler = std::move(handler), notified_handler = false](auto pid, - auto old_state, - auto new_state) mutable { - // transition from launched (the initial state) to any other state... - if ((old_state == i_agent_worker_t::state_t::launched) && !notified_handler) { - // handler should only be called once - notified_handler = true; - // handler receives 'true' for ready and 'false' for all other states as it indicates an error on startup - start_with(new_state == i_agent_worker_t::state_t::ready) // - | post_on(io_context) // - | then(std::move(handler)) // - | DETACH_LOG_ERROR("Notify launch handler operation"); - } - + return [this](auto pid, auto /*old_state*/, auto new_state) mutable { // transition to terminated if (new_state == i_agent_worker_t::state_t::terminated) { - start_on(strand) // - | then([this, pid]() { - LOG_DEBUG("Received agent terminated notification for agent process %d", pid); - // remove it - agent_workers.erase(pid); - // stop if no more agents - on_strand_check_terminated(); - }) // - | DETACH_LOG_ERROR("Handle agent terminated notification operation"); + spawn("Handle agent terminated notification operation", + start_on(strand) // + | then([this, pid]() { + LOG_DEBUG("Received agent terminated notification for agent process %d", pid); + // remove it + agent_workers.erase(pid); + // stop if no more agents + on_strand_check_terminated(); + })); } }; } - - /** Handle the sigchld event */ - void do_waitpid_children() - { - using namespace async::continuations; - - // iterate each child agent and check if it terminated. - // if so, notify its worker and remove it from the map. - // - // We don't use waitpid(0 or -1, ...) since there are other waitpid calls that block on a single pid and we dont - // want to swallow the process event from them - start_on(strand) // - | then([this]() { - // check all the child processes - for (auto it = agent_workers.begin(); it != agent_workers.end();) { - if (do_waitpid_for(it->first, it->second)) { - it = agent_workers.erase(it); - } - else { - ++it; - } - } - - // stop if no more items - on_strand_check_terminated(); - }) - | DETACH_LOG_ERROR("SIGCHLD handler"); - } - - /** Check the exit status for some worker process */ - bool do_waitpid_for(pid_t agent_pid, std::shared_ptr worker) - { - int wstatus = 0; - pid_t result = lib::waitpid(agent_pid, &wstatus, WNOHANG); - int error = errno; - - LOG_TRACE("Got waitpid(result=%d, wstatus=%d, pid=%d, worker=%p)", - result, - wstatus, - agent_pid, - worker.get()); - - // call waitpid to reap the child pid, but nothing to do - if (worker == nullptr) { - LOG_DEBUG("Unexpected state, received SIGCHLD for pid=%d, but no worker found", agent_pid); - return true; - } - - auto const process_exited = ((result == agent_pid) && (WIFEXITED(wstatus) || WIFSIGNALED(wstatus))); - auto const no_such_child = ((result == pid_t(-1)) && (error == ECHILD)); - - // the process terminated, or no such child exists, notify the worker class to update its state machine - if (process_exited || no_such_child) { - // notify of the signal - LOG_DEBUG("Notifying worker that agent process %d (%p) terminated.", agent_pid, worker.get()); - worker->on_sigchild(); - - // dont erase it yet, wait for the state machine to update - return false; - } - - // some other error occured, log it - if (result == pid_t(-1)) { - // NOLINTNEXTLINE(concurrency-mt-unsafe) - LOG_DEBUG("waitpid received error %d %s", error, strerror(error)); - } - - return false; - } }; /** Convenience alias for the context and manager */ diff --git a/daemon/agents/apc/intermediate_buffer_consumer.h b/daemon/agents/apc/intermediate_buffer_consumer.h deleted file mode 100644 index 4afb2abc..00000000 --- a/daemon/agents/apc/intermediate_buffer_consumer.h +++ /dev/null @@ -1,127 +0,0 @@ -/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ - -#pragma once - -#include -#include - -#include -#include -#include -#include - -namespace apc { - - /* - * Consumer for the intermediate buffer. - * - * Consumes the buffer and creates APC IPC messages. - * The ipc message will be send to ipc sink. - * - * Consumer will register to the asyn_buffer after each ipc message is send successfully. - * - */ - class intermediate_buffer_consumer_t : public std::enable_shared_from_this { - - public: - intermediate_buffer_consumer_t(std::shared_ptr async_buffer, - std::shared_ptr sender) - : async_buffer(async_buffer), sender(sender) {}; - - /** - * Start consuming, has to be called only once. - */ - template - auto async_start_consuming(CompletionToken && token) - { - return boost::asio::async_initiate( - [st = shared_from_this()](auto && handler) { - st->do_async_start_consuming(std::forward(handler)); - }, - token); - } - - /** - * Call to terminate the apc consumer . - * This will make the consumer to stops sending ipc message, and buffer - * does not get consumed. - */ - void terminate() { terminated.store(true, std::memory_order_release); } - - private: - std::atomic terminated {false}; - std::shared_ptr async_buffer; - std::shared_ptr sender; - - /** - * Registers for async_consume - * And upon notification calls on_has_buffer_data() - */ - template - void do_async_start_consuming(Handler && handler) - { - if (terminated.load(std::memory_order_acquire)) { - LOG_DEBUG("Intermitted buffer consumer is terminated."); - handler(boost::system::error_code {}); - } - else { - async_buffer->async_consume( - [st = weak_from_this(), - handler = std::forward(handler)](auto success, auto && buffer, auto && action) mutable { - auto this_obj = st.lock(); - if (this_obj) { - this_obj->on_has_buffer_data(success, - std::forward(buffer), - std::forward(action), - std::move(handler)); - } - else { - LOG_DEBUG("Could not consume buffer data, as this pointer is removed"); - } - }); - } - } - - /** - * Consumes the buffer if is ready (success ie true) to create apc ipc message, - * Ipc message is send to sink, if it successful (error code = {}) , marks the buffer as consumed - * and re registers for next set of bytes from the buffer. - * - * If the consumer is terminated the handler is called with error_code {}. - * - */ - template - void on_has_buffer_data(bool success, Buffer && buffer, Action && action, Handler && handler) - { - if (terminated.load(std::memory_order_acquire)) { - LOG_DEBUG("Intermitted buffer consumer is terminated."); - handler(boost::system::error_code {}); - } - else if (success) { - //intermediate buffer calls commit for its commit action (endFrame) after each frame. - sender->async_send_message(ipc::msg_apc_frame_data_from_span_t {std::move(buffer)}, - [st = shared_from_this(), - handler = std::forward(handler), - action = std::forward(action)](auto ec, auto /*msg*/) mutable { - if (!ec) { - // success, mark action complete, - action.consume(); - //re-regsiter for new bytes from async_buffer - st->do_async_start_consuming(std::move(handler)); - } - else { - // error, terminate - LOG_DEBUG("Failed to send apc ipc message due to %s", - ec.message().c_str()); - handler(ec); - } - }); - } - else { - // log and call the handler as now terminating - LOG_DEBUG("Failed to read from the intermitted buffer."); - handler(boost::asio::error::make_error_code(boost::asio::error::basic_errors::no_buffer_space)); - } - } - }; -} diff --git a/daemon/agents/common/coalescing_cpu_monitor.h b/daemon/agents/common/coalescing_cpu_monitor.h index aeffe988..b5e55229 100644 --- a/daemon/agents/common/coalescing_cpu_monitor.h +++ b/daemon/agents/common/coalescing_cpu_monitor.h @@ -6,8 +6,10 @@ #include "async/completion_handler.h" #include "async/continuations/async_initiate.h" #include "async/continuations/operations.h" +#include "async/continuations/stored_continuation.h" #include "async/continuations/use_continuation.h" #include "lib/Assert.h" +#include "lib/EnumUtils.h" #include #include @@ -59,12 +61,12 @@ namespace agents { using namespace async::continuations; return async_initiate_explicit( - [st = this->shared_from_this()](auto && receiver, auto && exceptionally) { + [st = this->shared_from_this()](auto && stored_continuation) { submit(start_on(st->strand) // - | then([st, r = std::forward(receiver)]() mutable { + | then([st, r = stored_continuation.move()]() mutable { st->on_strand_do_receive_one(std::move(r)); }), - std::forward(exceptionally)); + stored_continuation.get_exceptionally()); }, std::forward(token)); } @@ -74,17 +76,17 @@ namespace agents { { using namespace async::continuations; - start_on(strand) // - | then([st = shared_from_this()]() { - // mark as terminated - st->terminated = true; - // cancel the pending request if there is one - st->cancel_pending(); - // clear any state - st->per_core_state.clear(); - st->pending_cpu_nos.clear(); - }) // - | DETACH_LOG_ERROR("terminate cpu monitor"); + spawn("terminate cpu monitor", + start_on(strand) // + | then([st = shared_from_this()]() { + // mark as terminated + st->terminated = true; + // cancel the pending request if there is one + st->cancel_pending(); + // clear any state + st->per_core_state.clear(); + st->pending_cpu_nos.clear(); + })); } private: @@ -100,7 +102,7 @@ namespace agents { pending_online_offline, }; - using completion_handler_t = async::completion_handler_ref_t; + using completion_handler_t = async::continuations::stored_continuation_t; /** * Transition current->new state value based on received raw on-off event @@ -193,7 +195,7 @@ namespace agents { template void post_handler(Handler && handler, event_t event) { - boost::asio::post(strand.context(), [event, h = std::forward(handler)]() mutable { h(event); }); + resume_continuation(strand.context(), std::forward(handler), event); } /** Cancel and clear any pending request */ @@ -239,7 +241,7 @@ namespace agents { // find the next pending core auto cpu_no = pending_cpu_nos.front(); - runtime_assert((cpu_no >= 0) && (cpu_no < per_core_state.size()), "Invalid cpu_no value"); + runtime_assert((cpu_no >= 0) && (std::size_t(cpu_no) < per_core_state.size()), "Invalid cpu_no value"); // transform its state auto current_state = per_core_state[cpu_no]; @@ -251,8 +253,8 @@ namespace agents { per_core_state[cpu_no] = new_state; LOG_TRACE("Consuming coalesced CPU state from %u->%u, %u / %u", - current_state, - new_state, + lib::toEnumValue(current_state), + lib::toEnumValue(new_state), online, is_pending(new_state)); @@ -288,8 +290,8 @@ namespace agents { auto now_pending = is_pending(new_state); LOG_TRACE("Transitioning coalesced CPU state from %u->%u (%u/%u)", - current_state, - new_state, + lib::toEnumValue(current_state), + lib::toEnumValue(new_state), was_pending, now_pending); diff --git a/daemon/agents/common/nl_cpu_monitor.h b/daemon/agents/common/nl_cpu_monitor.h index fcb42daa..3f6a2438 100644 --- a/daemon/agents/common/nl_cpu_monitor.h +++ b/daemon/agents/common/nl_cpu_monitor.h @@ -2,6 +2,8 @@ #pragma once +#include "async/continuations/async_initiate.h" +#include "async/continuations/stored_continuation.h" #include "async/netlink/uevents.h" #include "lib/String.h" @@ -31,10 +33,13 @@ namespace agents { }; /** Constructor, using the provided context */ - explicit nl_kobject_uevent_cpu_monitor_t(boost::asio::io_context & context) : monitor(context) {} + explicit nl_kobject_uevent_cpu_monitor_t(boost::asio::io_context & context) : context(context), monitor(context) + { + } /** Constructor, using the provided monitor (for testing) */ - explicit nl_kobject_uevent_cpu_monitor_t(monitor_type && monitor) : monitor(std::forward(monitor)) + explicit nl_kobject_uevent_cpu_monitor_t(boost::asio::io_context & context, monitor_type && monitor) + : context(context), monitor(std::forward(monitor)) { } @@ -50,11 +55,11 @@ namespace agents { template auto async_receive_one(CompletionToken && token) { - return boost::asio::async_initiate( - [st = this->shared_from_this()](auto && handler) { - st->do_receive_event(std::forward(handler)); - }, - token); + using namespace async::continuations; + + return async_initiate_explicit( + [st = this->shared_from_this()](auto && sc) { st->do_receive_event(std::forward(sc)); }, + std::forward(token)); } private: @@ -63,52 +68,53 @@ namespace agents { static constexpr std::string_view devpath_cpu_prefix {"/devices/system/cpu/cpu"}; static constexpr std::string_view subsystem_cpu {"cpu"}; + boost::asio::io_context & context; monitor_type monitor; /** Async wait for one uevent to be received and parsed */ - template - void do_receive_event(Handler && handler) + template + void do_receive_event(async::continuations::raw_stored_continuation_t && sc) { monitor.async_receive_one( - [st = this->shared_from_this(), - h = std::forward(handler)](auto const & ec, auto const & event) mutable { + [st = this->shared_from_this(), sc = std::move(sc)](auto const & ec, auto const & event) mutable { if (!ec) { - st->process_event(std::move(h), event); + st->process_event(std::move(sc), event); } else { // convert it into stop code LOG_DEBUG("Received '%s', stopping Netlink CPU monitor", ec.message().c_str()); - h(event_t {-1, false}); + resume_continuation(st->context, std::move(sc), event_t {-1, false}); } }); } /** Parse the received event; will recurse for another event if the event is not a cpu online/offline event, otherwise passes to the handler */ - template - void process_event(Handler && handler, Event const & event) + template + void process_event(async::continuations::raw_stored_continuation_t && sc, Event const & event) { if (event.subsystem != subsystem_cpu) { - return do_receive_event(std::forward(handler)); + return do_receive_event(std::move(sc)); } if (!lib::starts_with(event.devpath, devpath_cpu_prefix)) { - return do_receive_event(std::forward(handler)); + return do_receive_event(std::move(sc)); } auto online = (event.action == action_online); auto offline = (event.action == action_offline); if ((!online) && (!offline)) { - return do_receive_event(std::forward(handler)); + return do_receive_event(std::move(sc)); } auto cpu_no_sv = event.devpath.substr(devpath_cpu_prefix.size()); auto cpu_no = lib::to_int(cpu_no_sv, -1); if (cpu_no < 0) { - return do_receive_event(std::forward(handler)); + return do_receive_event(std::move(sc)); } - handler(event_t {cpu_no, online}); + // dispatch the handler with the event + resume_continuation(context, std::move(sc), event_t {cpu_no, online}); } }; } diff --git a/daemon/agents/common/polling_cpu_monitor.h b/daemon/agents/common/polling_cpu_monitor.h index 7c4257d7..3275eae1 100644 --- a/daemon/agents/common/polling_cpu_monitor.h +++ b/daemon/agents/common/polling_cpu_monitor.h @@ -5,6 +5,7 @@ #include "async/completion_handler.h" #include "async/continuations/async_initiate.h" #include "async/continuations/operations.h" +#include "async/continuations/stored_continuation.h" #include "async/continuations/use_continuation.h" #include "lib/Assert.h" #include "lib/FsEntry.h" @@ -87,36 +88,43 @@ namespace agents { auto st = shared_from_this(); - repeatedly( - [st]() { - return start_on(st->strand) // - | then([st]() { return (!st->terminated) || (!st->monitor_paths.empty()); }); - }, - [st]() { - return start_on(st->strand) // - | then([st]() { return st->on_strand_do_poll(); }) // - | then([st](bool any_offline) { - st->timer.expires_from_now(any_offline ? short_poll_interval : long_poll_interval); - }) // - | st->timer.async_wait(use_continuation) // - | dispatch_on(st->strand) // - | then([st](auto ec) { - // swallow cancel event, mark as terminated instead - if (ec == boost::asio::error::make_error_code(boost::asio::error::operation_aborted)) { - LOG_DEBUG("Polling CPU monitor is now terminated"); - if (!std::exchange(st->terminated, true)) { - st->enqueue_event(-1, false); + spawn( + "raw cpu event monitor", + repeatedly( + [st]() { + return start_on(st->strand) // + | then([st]() { return (!st->terminated) || (!st->monitor_paths.empty()); }); + }, + [st]() { + return start_on(st->strand) // + | then([st]() { return st->on_strand_do_poll(); }) // + | then([st](bool any_offline) { + st->timer.expires_from_now(any_offline ? short_poll_interval : long_poll_interval); + }) // + | st->timer.async_wait(use_continuation) // + | post_on(st->strand) // + | then([st](auto ec) { + // swallow cancel event, mark as terminated instead + if (ec + == boost::asio::error::make_error_code(boost::asio::error::operation_aborted)) { + LOG_DEBUG("Polling CPU monitor is now terminated"); + if (!std::exchange(st->terminated, true)) { + st->enqueue_event(-1, false); + } + return boost::system::error_code {}; + } + if (ec) { + LOG_ERROR("??? %s", ec.message().c_str()); } - return boost::system::error_code {}; - } - if (ec) { - LOG_ERROR("??? %s", ec.message().c_str()); - } - return ec; - }) // - | map_error(); - }) // - | DETACH_LOG_ERROR("raw cpu event monitor"); + return ec; + }) // + | map_error(); + }), + [st](bool failed) { + if (failed) { + st->stop(); + } + }); } /** Stop observing for changes */ @@ -126,14 +134,14 @@ namespace agents { auto st = shared_from_this(); - start_on(strand) // - | then([st]() { - if (!std::exchange(st->terminated, true)) { - st->timer.cancel(); - st->enqueue_event(-1, false); - } - }) // - | DETACH_LOG_ERROR("stop raw cpu event monitor"); + spawn("stop raw cpu event monitor", + start_on(strand) // + | then([st]() { + if (!std::exchange(st->terminated, true)) { + st->timer.cancel(); + st->enqueue_event(-1, false); + } + })); } template @@ -142,18 +150,18 @@ namespace agents { using namespace async::continuations; return async_initiate_explicit( - [st = shared_from_this()](auto && receiver, auto && exceptionally) { + [st = shared_from_this()](auto && stored_continuation) { submit(start_on(st->strand) // - | then([st, r = std::forward(receiver)]() mutable { + | then([st, r = stored_continuation.move()]() mutable { st->on_strand_do_receive_one(std::move(r)); }), - std::forward(exceptionally)); + stored_continuation.get_exceptionally()); }, std::forward(token)); } private: - using completion_handler_t = async::completion_handler_ref_t; + using completion_handler_t = async::continuations::stored_continuation_t; boost::asio::steady_timer timer; boost::asio::io_context::strand strand; @@ -168,7 +176,7 @@ namespace agents { template void post_handler(Handler && handler, event_t event) { - boost::asio::post(strand.context(), [event, h = std::forward(handler)]() mutable { h(event); }); + resume_continuation(strand.context(), std::forward(handler), event); } /** Handle the request to consume one pending event */ @@ -246,6 +254,7 @@ namespace agents { if (inserted || first_pass) { enqueue_event(cpu, true); } + (void) it; // gcc7 } else { auto count = online_cpu_nos.erase(cpu); diff --git a/daemon/agents/common/socket_worker.h b/daemon/agents/common/socket_worker.h index 58f73f2a..de2695d1 100644 --- a/daemon/agents/common/socket_worker.h +++ b/daemon/agents/common/socket_worker.h @@ -4,6 +4,8 @@ #include "Logging.h" #include "agents/common/socket_reference.h" +#include "async/continuations/async_initiate.h" +#include "async/continuations/stored_continuation.h" #include "lib/Assert.h" #include @@ -32,11 +34,12 @@ namespace agents { using ipc_sink_type = IpcSinkType; /** Factory method */ - static std::shared_ptr create(ipc_sink_type && ipc_sink, + static std::shared_ptr create(boost::asio::io_context & context, + ipc_sink_type && ipc_sink, std::shared_ptr socket_ref) { return std::make_shared( - socket_read_worker_t {std::move(ipc_sink), std::move(socket_ref)}); + socket_read_worker_t {context, std::move(ipc_sink), std::move(socket_ref)}); } /** @return True if the socket is still open */ @@ -49,7 +52,7 @@ namespace agents { ipc_sink.async_send_new_connection([st = this->shared_from_this()](auto const & ec, auto /*msg*/) { if (ec) { // log it and close the connection - LOG_ERROR_IF_NOT_EOF( + LOG_ERROR_IF_NOT_EOF_OR_CANCELLED( ec, "(%p) Error occured while notifying IPC of new external connection %d, dropping due to %s", st.get(), @@ -66,69 +69,72 @@ namespace agents { template auto async_send_bytes(std::vector && bytes, CompletionToken && token) { + using namespace async::continuations; + LOG_TRACE("(%p) Received request to send %zu bytes", this, bytes.size()); - return boost::asio::async_initiate( - [st = this->shared_from_this(), bytes = std::move(bytes)](auto && handler) mutable { - using Handler = decltype(handler); - return st->do_async_send_bytes(std::move(bytes), std::forward(handler)); + return async_initiate_explicit( + [st = this->shared_from_this(), bytes = std::move(bytes)](auto && sc) mutable { + return st->do_async_send_bytes(std::move(bytes), std::forward(sc)); }, - token); + std::forward(token)); } /** Close the connection */ template auto async_close(CompletionToken && token) { - return boost::asio::async_initiate( - [st = this->shared_from_this()](auto && handler) mutable { - using Handler = decltype(handler); - st->do_async_close(std::forward(handler)); + using namespace async::continuations; + + return async_initiate_explicit( + [st = this->shared_from_this()](auto && sc) mutable { + st->do_async_close(std::forward(sc)); }, - token); + std::forward(token)); } private: + boost::asio::io_context & context; ipc_sink_type ipc_sink; std::shared_ptr socket_ref; std::vector receive_message_buffer {}; - socket_read_worker_t(ipc_sink_type && ipc_sink, std::shared_ptr socket_ref) - : ipc_sink(std::move(ipc_sink)), socket_ref(std::move(socket_ref)) + socket_read_worker_t(boost::asio::io_context & context, + ipc_sink_type && ipc_sink, + std::shared_ptr socket_ref) + : context(context), ipc_sink(std::move(ipc_sink)), socket_ref(std::move(socket_ref)) { } /** Perform the async close operation */ - template - void do_async_close(Handler && handler) + template + void do_async_close(async::continuations::raw_stored_continuation_t && sc) { - using handler_type = std::decay_t; - // tell the IPC mechanism, but only once if (is_open()) { return ipc_sink.async_send_close_connection( - [st = this->shared_from_this(), handler = std::forward(handler)](auto const & /*ec*/, - auto /*msg*/) { + [st = this->shared_from_this(), sc = std::move(sc)](auto const & /*ec*/, auto /*msg*/) mutable { // close the socket st->socket_ref->close(); // notify the handler - return handler(); + return resume_continuation(st->context, std::move(sc)); }); } // otherwise just call the handler directly - return handler(); + return resume_continuation(context, std::move(sc)); } /** Perform the async send operation */ - template - void do_async_send_bytes(std::vector && bytes, Handler && handler) + template + void do_async_send_bytes(std::vector && bytes, + async::continuations::raw_stored_continuation_t && sc) { - using handler_type = std::decay_t; + using namespace async::continuations; socket_ref->with_socket([st = this->shared_from_this(), bytes_ptr = std::make_unique>(std::move(bytes)), - handler = std::forward(handler)](auto & socket) mutable { + sc = std::move(sc)](auto & socket) mutable { // make the buffer before the call to move(bytes_ptr) otherwise the move will happen before the deref auto buffer = boost::asio::buffer(*bytes_ptr); @@ -138,21 +144,21 @@ namespace agents { socket, buffer, // write result handler - [st, bytes_ptr = std::move(bytes_ptr), handler = std::forward(handler), &socket] // + [st, bytes_ptr = std::move(bytes_ptr), sc = std::move(sc), &socket] // (auto const & ec, auto n_written) mutable { // handle send error? if (ec) { // log it and close the connection - LOG_ERROR_IF_NOT_EOF(ec, - "(%p) Error occured forwarding bytes to external " - "connection %d, dropping due to %s", - st.get(), - socket.native_handle(), - ec.message().c_str()); - return st->async_close([ec, handler = std::forward(handler)]() { - // pass the error code to the handler - handler(ec); - }); + LOG_ERROR_IF_NOT_EOF_OR_CANCELLED(ec, + "(%p) Error occured forwarding bytes to external " + "connection %d, dropping due to %s", + st.get(), + socket.native_handle(), + ec.message().c_str()); + + return submit(st->context, + st->async_close(use_continuation) | then([ec]() { return ec; }), + std::move(sc)); } // send length error @@ -163,16 +169,20 @@ namespace agents { "short write", st.get(), socket.native_handle()); - return st->async_close([handler = std::forward(handler)]() { - // pass EOF error code to handler - handler(boost::asio::error::make_error_code(boost::asio::error::misc_errors::eof)); - }); + + // pass EOF error code to handler + return submit(st->context, + st->async_close(use_continuation) | then([]() { + return boost::asio::error::make_error_code( + boost::asio::error::misc_errors::eof); + }), + std::move(sc)); } LOG_TRACE("(%p) Sent %zu bytes", st.get(), n_written); // wait for the next message - return handler(boost::system::error_code {}); + return resume_continuation(st->context, std::move(sc), boost::system::error_code {}); }); }); } @@ -192,7 +202,7 @@ namespace agents { [st](auto ec, auto n_read) mutable { if (ec) { // log it and close the connection - LOG_ERROR_IF_NOT_EOF( + LOG_ERROR_IF_NOT_EOF_OR_CANCELLED( ec, "(%p) Error occured reading bytes for external connection %d, dropping due to %s", st.get(), @@ -226,7 +236,7 @@ namespace agents { // handle send error? if (ec) { // log it and close the connection - LOG_ERROR_IF_NOT_EOF( + LOG_ERROR_IF_NOT_EOF_OR_CANCELLED( ec, "(%p) Error occured forwarding bytes for external connection %d, dropping due to %s", st.get(), diff --git a/daemon/agents/ext_source/ext_source_agent.h b/daemon/agents/ext_source/ext_source_agent.h index 01ef77cc..e5e24ecd 100644 --- a/daemon/agents/ext_source/ext_source_agent.h +++ b/daemon/agents/ext_source/ext_source_agent.h @@ -2,11 +2,13 @@ #pragma once #include "Logging.h" +#include "agents/agent_environment.h" #include "agents/common/socket_listener.h" #include "agents/common/socket_reference.h" #include "agents/common/socket_worker.h" #include "agents/ext_source/ipc_sink_wrapper.h" #include "async/completion_handler.h" +#include "async/continuations/continuation.h" #include "ipc/messages.h" #include "ipc/raw_ipc_channel_sink.h" #include "ipc/raw_ipc_channel_source.h" @@ -32,6 +34,8 @@ namespace agents { */ class ext_source_agent_t : public std::enable_shared_from_this { public: + using accepted_message_types = std::tuple; + using socket_read_worker_type = socket_read_worker_t; static constexpr std::string_view annotation_uds_parent_socket_name {"\0streamline-annotate-parent", 27}; @@ -41,20 +45,18 @@ namespace agents { static std::shared_ptr create(boost::asio::io_context & io_context, std::shared_ptr ipc_sink, - std::shared_ptr ipc_source) + agent_environment_base_t::terminator terminator) { - return std::make_shared(io_context, std::move(ipc_sink), std::move(ipc_source)); + return std::make_shared(io_context, std::move(ipc_sink), std::move(terminator)); } // use create... or make shared your self... ext_source_agent_t(boost::asio::io_context & io_context, std::shared_ptr ipc_sink, - std::shared_ptr ipc_source) - : io_context(io_context), - strand(io_context), - ipc_sink(std::move(ipc_sink)), - ipc_source(std::move(ipc_source)) + [[maybe_unused]] agent_environment_base_t::terminator terminator) + : io_context(io_context), strand(io_context), ipc_sink(std::move(ipc_sink)) { + // terminator isn't used as failed connections are closed individually, they won't kill the whole capture } /** Add a UDS annotation socket listener */ @@ -124,32 +126,27 @@ namespace agents { }); } - /** Start the agent main worker loop */ - void start() + async::continuations::polymorphic_continuation_t<> co_shutdown() { - // strand is used for synchronizing access to internal structures - return boost::asio::post(strand, [st = shared_from_this()]() { st->on_strand_do_started(); }); + using namespace async::continuations; + + auto self = this->shared_from_this(); + return start_on(strand) | then([self]() mutable -> polymorphic_continuation_t<> { + if (std::exchange(self->is_shutdown, true)) { + return {}; + } + return self->co_shutdown_workers(); + }); } - /** Shutdown the agent (closes all listeners and workers and then stops receiving new IPC messages) */ - void shutdown() + async::continuations::polymorphic_continuation_t<> co_receive_message(ipc::msg_annotation_send_bytes_t msg) { - LOG_DEBUG("Shutdown received"); - - // strand is used for synchronizing access to internal structures - return boost::asio::post(strand, [st = shared_from_this()]() { st->on_strand_do_shutdown(); }); + return co_send_annotation_bytes(std::move(msg)); } - /** Wait for the agent to fully shut down */ - template - auto async_wait_shutdown(CompletionToken && token) + async::continuations::polymorphic_continuation_t<> co_receive_message(ipc::msg_annotation_close_conn_t msg) { - return boost::asio::async_initiate( - [st = shared_from_this()](auto && handler) mutable { - using Handler = decltype(handler); - st->do_async_wait_shutdown(std::forward(handler)); - }, - token); + return co_close_worker_by_id(msg.header); } private: @@ -158,37 +155,101 @@ namespace agents { boost::asio::io_context & io_context; boost::asio::io_context::strand strand; std::shared_ptr ipc_sink; - std::shared_ptr ipc_source; std::vector> socket_listeners {}; std::vector> parent_connections {}; - std::vector> shutdown_handlers {}; std::map> socket_workers {}; ipc::annotation_uid_t uid_counter {0}; bool is_shutdown {false}; - /** The agent is started */ - void on_strand_do_started() + /** Handle the 'send bytes' IPC message variant. Transmit the bytes to the appropriate worker. */ + async::continuations::polymorphic_continuation_t<> co_send_annotation_bytes( + ipc::msg_annotation_send_bytes_t message) { - // skip 'ready' if already shut down - if (is_shutdown) { - // now wait to receive messages - return on_strand_do_receive_message(); - } + using namespace async::continuations; + + auto self = this->shared_from_this(); + + return start_on(strand) + | then([self, message = std::move(message)]() mutable -> polymorphic_continuation_t<> { + LOG_TRACE("Received %zu bytes for transmission to worker %d", + message.suffix.size(), + message.header); + + auto worker_it = self->socket_workers.find(message.header); + if (worker_it == self->socket_workers.end()) { + LOG_DEBUG("Received bytes for non-existent client %d", message.header); + return {}; + } + + auto worker = worker_it->second; + if (!worker) { + LOG_DEBUG("Received bytes for non-existent client %d", message.header); + return {}; + } + + return worker->async_send_bytes(std::move(message.suffix), use_continuation) + | then( + [id = message.header, self](const auto & ec) mutable -> polymorphic_continuation_t<> { + if (ec) { + LOG_DEBUG("Failed to send bytes to worker %d due to %s", + id, + ec.message().c_str()); + return self->co_close_worker_by_id(id); + } + return {}; + }); + }); + } - // send the 'ready' IPC message - return ipc_sink->async_send_message( - ipc::msg_ready_t {}, - [st = shared_from_this()](auto const & ec, auto const & /*msg*/) { - if (ec) { - LOG_DEBUG("Failed to send ready IPC to host due to %s", ec.message().c_str()); - } - else { - LOG_TRACE("Ready message sent"); - } - - // now wait to receive messages - return boost::asio::post(st->strand, [st]() { st->on_strand_do_receive_message(); }); - }); + /** Stop listening and close all workers */ + async::continuations::polymorphic_continuation_t<> co_shutdown_workers() + { + using namespace async::continuations; + + auto self = shared_from_this(); + return start_on(strand) + // first stop listening + | then([self]() mutable { + self->is_shutdown = true; + + LOG_TRACE("Closing all listeners"); + + // close all listeners so their can be no new inbound connections + for (auto & socket_listener : self->socket_listeners) { + socket_listener->close(); + } + + self->socket_listeners.clear(); + + LOG_TRACE("Closing all workers"); + }) + // then close all of the workers + | iterate(socket_workers, + [self](auto it) mutable { + auto worker = it->second; + + LOG_TRACE("Closing worker %d (%p)", it->first, worker.get()); + + // remove from the map + self->socket_workers.erase(it); + + static_assert(!std::is_const_v); + return worker->async_close(use_continuation); + }) + | then([self]() mutable { self->socket_workers.clear(); }) + // then close the parent connections + | iterate(parent_connections, + [self](auto it) mutable { + auto parent = *it; + // close the parent connections after writing a single 0-byte to each + parent->with_socket([parent](auto & socket) mutable { + boost::asio::async_write( + socket, + boost::asio::buffer(close_parent_bytes), + [parent](auto const & /*ec*/, auto /*n*/) mutable { parent->close(); }); + }); + }) + | then([self]() mutable { self->parent_connections.clear(); }); } /** Handle an annotations 'parent' connection */ @@ -224,7 +285,8 @@ namespace agents { // create it auto id = ++st->uid_counter; auto socket_read_worker = - socket_read_worker_type::create(ipc_annotations_sink_adapter_t(st->ipc_sink, id), + socket_read_worker_type::create(st->io_context, + ipc_annotations_sink_adapter_t(st->ipc_sink, id), make_socket_ref(std::move(socket))); // store it @@ -263,274 +325,32 @@ namespace agents { worker->start(); } - /** Receive the next IPC message from the IPC source */ - void on_strand_do_receive_message() - { - // check shutdown state - if (is_shutdown && socket_workers.empty() && socket_listeners.empty()) { - LOG_DEBUG("Shutdown complete. Notifying handlers."); - - // notify any handlers - for (auto & handler : shutdown_handlers) { - // post each handler - boost::asio::post(io_context, std::move(handler)); - } - - // clear the list of handlers as we are done with them now - shutdown_handlers.clear(); - return; - } - - // receive next message - ipc_source->async_recv_message( - [st = shared_from_this()](boost::system::error_code const & ec, - ipc::all_message_types_variant_t && msg_variant) mutable { - if (ec) { - LOG_DEBUG("Failed to receive IPC message due to %s", ec.message().c_str()); - return st->shutdown(); - } - // strand is used for synchronizing access to internal structures - return boost::asio::post(st->strand, [st, msg_variant = std::move(msg_variant)]() mutable { - std::visit( - [st](auto && message) { - // NOLINTNEXTLINE(bugprone-move-forwarding-reference) - return st->on_strand_do_handle_message(std::move(message)); - }, - std::move(msg_variant)); - }); - }); - } - - /** Handle one of the IPC variant values */ - void on_strand_do_handle_message(std::monostate const & /*message*/) - { - LOG_DEBUG("Unexpected message std::monostate; ignoring"); - return on_strand_do_receive_message(); - } - - /** Handle one of the IPC variant values */ - void on_strand_do_handle_message(ipc::msg_annotation_new_conn_t const & /*message*/) - { - LOG_DEBUG("Unexpected message ipc::msg_annotation_new_conn_t; ignoring"); - return on_strand_do_receive_message(); - } - - /** Handle one of the IPC variant values */ - void on_strand_do_handle_message(ipc::msg_annotation_recv_bytes_t const & /*message*/) - { - LOG_DEBUG("Unexpected message ipc::msg_annotation_recv_bytes_t; ignoring"); - return on_strand_do_receive_message(); - } - - /** Handle one of the IPC variant values */ - void on_strand_do_handle_message(ipc::msg_capture_configuration_t const & /*message*/) - { - LOG_DEBUG("Unexpected message ipc::msg_capture_ready_t; ignoring"); - return on_strand_do_receive_message(); - } - - /** Handle one of the IPC variant values */ - void on_strand_do_handle_message(ipc::msg_capture_ready_t const & /*message*/) - { - LOG_DEBUG("Unexpected message ipc::msg_capture_ready_t; ignoring"); - return on_strand_do_receive_message(); - } - - /** Handle one of the IPC variant values */ - void on_strand_do_handle_message(ipc::msg_apc_frame_data_t const & /*message*/) - { - LOG_DEBUG("Unexpected message ipc::msg_apc_frame_data_t; ignoring"); - return on_strand_do_receive_message(); - } - - /** Handle one of the IPC variant values */ - void on_strand_do_handle_message(ipc::msg_ready_t const & /*message*/) - { - LOG_DEBUG("Received ready message."); - return on_strand_do_receive_message(); - } - - /** Handle one of the IPC variant values */ - void on_strand_do_handle_message(ipc::msg_start_t const & /*message*/) - { - LOG_DEBUG("Unexpected message ipc::msg_start_t; ignoring"); - return on_strand_do_receive_message(); - } - - /** Handle one of the IPC variant values */ - void on_strand_do_handle_message(ipc::msg_exec_target_app_t const & /*message*/) - { - LOG_DEBUG("Unexpected message ipc::msg_exec_target_app_t; ignoring"); - return on_strand_do_receive_message(); - } - - /** Handle one of the IPC variant values */ - void on_strand_do_handle_message(ipc::msg_cpu_state_change_t const & /*message*/) - { - LOG_DEBUG("Unexpected message ipc::msg_cpu_state_change_t; ignoring"); - return on_strand_do_receive_message(); - } - - /** Handle the 'send bytes' IPC message variant. Transmit the bytes to the appropriate worker. */ - void on_strand_do_handle_message(ipc::msg_annotation_send_bytes_t message) - { - LOG_TRACE("Received %zu bytes for transmission to worker %d", message.suffix.size(), message.header); - - auto worker_it = socket_workers.find(message.header); - if (worker_it == socket_workers.end()) { - LOG_DEBUG("Received bytes for non-existant client %d", message.header); - return on_strand_do_receive_message(); - } - - auto worker = worker_it->second; - if (!worker) { - LOG_DEBUG("Received bytes for non-existant client %d", message.header); - return on_strand_do_receive_message(); - } - - return worker->async_send_bytes( - std::move(message.suffix), - [id = message.header, st = shared_from_this()](auto const & ec) { - if (ec) { - LOG_DEBUG("Failed to send bytes to worker %d due to %s", id, ec.message().c_str()); - // strand is used for synchronizing access to internal structures - return boost::asio::post(st->strand, [id, st]() { - // close the failed connection - st->on_strand_do_close_worker_by_id(id); - }); - } - - // strand is used for synchronizing access to internal structures - return boost::asio::post(st->strand, [st]() { st->on_strand_do_receive_message(); }); - }); - } - - /** Handle the 'close connection' IPC message variant. Close the appropriate worker. */ - void on_strand_do_handle_message(ipc::msg_annotation_close_conn_t const & message) + /** Close a worker given its unique ID */ + async::continuations::polymorphic_continuation_t<> co_close_worker_by_id(ipc::annotation_uid_t id) { - on_strand_do_close_worker_by_id(message.header); - } - - /** Handle the 'shutdown' IPC message variant. Shutdown the agent. */ - void on_strand_do_handle_message(ipc::msg_shutdown_t const & /*message*/) - { - LOG_DEBUG("Received shutdown message"); - return on_strand_do_shutdown(); - } - - /** Close one worker by its ID */ - void on_strand_do_close_worker_by_id(ipc::annotation_uid_t id) - { - auto worker_it = socket_workers.find(id); - if (worker_it == socket_workers.end()) { - LOG_DEBUG("Received close request for non-existant client %d", id); - return on_strand_do_receive_message(); - } + using namespace async::continuations; - auto worker = worker_it->second; - if (!worker) { - LOG_DEBUG("Received close request for non-existant client %d", id); - return on_strand_do_receive_message(); - } + auto self = this->shared_from_this(); - // remove from the map - socket_workers.erase(worker_it); + return start_on(strand) | then([self, id]() -> async::continuations::polymorphic_continuation_t<> { + auto worker_it = self->socket_workers.find(id); + if (worker_it == self->socket_workers.end()) { + LOG_DEBUG("Received close request for non-existent client %d", id); + return {}; + } - // close it - return worker->async_close([st = shared_from_this()]() { - // receive the next message - return boost::asio::post(st->strand, [st]() { st->on_strand_do_receive_message(); }); - }); - } + auto worker = worker_it->second; + if (!worker) { + LOG_DEBUG("Received close request for non-existent client %d", id); + return {}; + } - /** Do the work to shutdown the agent */ - void on_strand_do_shutdown() - { - // mark shutdown - if (std::exchange(is_shutdown, true)) { - LOG_DEBUG("Ignoring duplicate shutdown call"); - // was already shutdown. nothing to do - return on_strand_do_receive_message(); - } + // remove from the map + self->socket_workers.erase(worker_it); - LOG_TRACE("Closing all listeners"); - - // close all listeners so their can be no new inbound connections - for (auto & socket_listener : socket_listeners) { - socket_listener->close(); - } - - socket_listeners.clear(); - - LOG_TRACE("Closing all workers"); - - // close each worker - return on_strand_close_next_worker(); - } - - /** Close one worker (called iteratively) until such time as there are no workers left, then notify the IPC mechanism that the agent is shutdown */ - void on_strand_close_next_worker() - { - if (socket_workers.empty()) { - LOG_TRACE("All workers closed. Sending shutdown message"); - - // close the parent connections after writing a single 0-byte to each - for (auto & parent : parent_connections) { - parent->with_socket([parent](auto & socket) { - boost::asio::async_write(socket, - boost::asio::buffer(close_parent_bytes), - [parent](auto const & /*ec*/, auto /*n*/) { parent->close(); }); - }); - } - parent_connections.clear(); - - // send the 'shutdown' IPC message - return ipc_sink->async_send_message( - ipc::msg_shutdown_t {}, - [st = shared_from_this()](auto const & ec, auto const & /*msg*/) { - if (ec) { - LOG_DEBUG("Failed to send shutdown IPC to host due to %s", ec.message().c_str()); - } - else { - LOG_TRACE("Shutdown message sent"); - } - - // receive the next message (will terminate if the state is fully shutdown) - return boost::asio::post(st->strand, [st]() { st->on_strand_do_receive_message(); }); - }); - } - - // get the next item - auto it = socket_workers.begin(); - auto worker = it->second; - - LOG_TRACE("Closing worker %d (%p)", it->first, worker.get()); - - // remove from the map - socket_workers.erase(it); - - // close it - return worker->async_close([st = shared_from_this()]() { - LOG_TRACE("Closed worker"); - // close the next worker - return boost::asio::post(st->strand, [st]() { st->on_strand_close_next_worker(); }); - }); - } - - /** Handle the initiated async_wait_shutdown request */ - template - auto do_async_wait_shutdown(Handler handler) - { - // strand is used for synchronizing access to internal structures - return boost::asio::post(strand, [st = shared_from_this(), handler = std::move(handler)]() mutable { - // call directly if already shut down - if (st->is_shutdown) { - return handler(); - } - - // store it for later - st->shutdown_handlers.emplace_back(async::make_handler_ref(std::move(handler))); - }); + // close it + return worker->async_close(async::continuations::use_continuation); + }); } }; } diff --git a/daemon/agents/ext_source/ext_source_agent_main.cpp b/daemon/agents/ext_source/ext_source_agent_main.cpp index 3c7b7dd1..7e7a26e1 100644 --- a/daemon/agents/ext_source/ext_source_agent_main.cpp +++ b/daemon/agents/ext_source/ext_source_agent_main.cpp @@ -2,129 +2,29 @@ #include "agents/ext_source/ext_source_agent_main.h" #include "Logging.h" +#include "agents/agent_environment.h" #include "agents/ext_source/ext_source_agent.h" -#include "ipc/raw_ipc_channel_sink.h" #include "ipc/raw_ipc_channel_source.h" -#include "lib/AutoClosingFd.h" -#include "logging/agent_log.h" - -#include -#include -#include -#include -#include - -#include -#include - -#include -#include namespace agents { - namespace { - lib::AutoClosingFd dup_and_close(int fd) - { - lib::AutoClosingFd dup_fd {fcntl(fd, F_DUPFD_CLOEXEC)}; - - if (!dup_fd) { - // NOLINTNEXTLINE(concurrency-mt-unsafe) - LOG_DEBUG("fcntl failed with error %d (%s)", errno, strerror(errno)); - - // not ideal, but just use the FD directly - return lib::AutoClosingFd {fd}; - } - - // now close it - close(fd); - return dup_fd; - } - - void do_wait_signal(boost::asio::signal_set & signals, std::shared_ptr agent) - { - signals.async_wait([agent = std::move(agent), &signals](auto const & ec, auto signo) mutable { - if (ec) { - LOG_DEBUG("Signal handler received error %s", ec.message().c_str()); - return; - } - //NOLINTNEXTLINE(concurrency-mt-unsafe) - LOG_DEBUG("Received signal %d %s", signo, strsignal(signo)); - if ((signo == SIGHUP) || (signo == SIGTERM) || (signo == SIGINT)) { - agent->shutdown(); - } - else { - do_wait_signal(signals, std::move(agent)); - } - }); - } - } int ext_agent_main(char const * /*argv0*/, lib::Span args) { - // set process name - prctl(PR_SET_NAME, reinterpret_cast(&"gatord-agent-xs"), 0, 0, 0); - - // Set up global thread-safe logging - auto agent_logging = - std::make_shared(STDERR_FILENO, logging::agent_log_sink_t::get_log_file_fd()); - - logging::set_log_sink(agent_logging); - logging::set_log_enable_trace(args); - - try { - LOG_DEBUG("Started ext_agent_main"); - - // disable buffering on in/out/err - ::setvbuf(stdin, nullptr, _IONBF, 0); - ::setvbuf(stdout, nullptr, _IONBF, 0); - ::setvbuf(stderr, nullptr, _IONBF, 0); - - // get sighup if parent exits - ::prctl(PR_SET_PDEATHSIG, SIGHUP); - - // duplicate stdin/stdout, then close them so that some spurious read/write doesn't corrupt the IPC channel - auto ipc_in = dup_and_close(STDIN_FILENO); - auto ipc_out = dup_and_close(STDOUT_FILENO); - - // setup asio context - boost::asio::io_context io_context {}; - - // handle the usual signals (and SIGHUP) so we can shutdown properly - boost::asio::signal_set signals {io_context, SIGHUP, SIGTERM, SIGINT}; - - // create our IPC channels - auto ipc_sink = ipc::raw_ipc_channel_sink_t::create(io_context, std::move(ipc_out)); - auto ipc_source = ipc::raw_ipc_channel_source_t::create(io_context, std::move(ipc_in)); - - // create our agent - auto agent = ext_source_agent_t::create(io_context, ipc_sink, ipc_source); - agent->add_all_defaults(); - - // handle signals - do_wait_signal(signals, agent); - // and shutdown - agent->async_wait_shutdown([&io_context]() { - // fully shut down - LOG_DEBUG("Agent is shutdown. Stopping io_context."); - io_context.stop(); - }); - - // start the agent - agent->start(); - - // run the main work loop - io_context.run(); - } - catch (std::exception const & ex) { - LOG_FATAL("Unexpected exception received: what=%s", ex.what()); - return EXIT_FAILURE; - } - catch (...) { - LOG_FATAL("Unexpected exception received."); - return EXIT_FAILURE; - } - - LOG_DEBUG("Terminating ext_source agent successfully."); - - return EXIT_SUCCESS; + return start_agent(args, [](auto /*args*/, auto & io, auto & pm, auto ipc_sink, auto ipc_source) { + // Wrap the create function so we can setup the default UDS and TCP listeners + auto factory = [](auto & io, auto & /*pm*/, auto sink, auto terminator) { + auto agent = ext_source_agent_t::create(io, std::move(sink), std::move(terminator)); + agent->add_all_defaults(); + + return agent; + }; + + return agent_environment_t::create("gator-agent-xs", + io, + pm, + std::move(factory), + std::move(ipc_sink), + std::move(ipc_source)); + }); } } diff --git a/daemon/agents/ext_source/ext_source_agent_worker.h b/daemon/agents/ext_source/ext_source_agent_worker.h index d5d577b7..718292cc 100644 --- a/daemon/agents/ext_source/ext_source_agent_worker.h +++ b/daemon/agents/ext_source/ext_source_agent_worker.h @@ -73,7 +73,7 @@ namespace agents { auto st = this->shared_from_this(); - return start_on(strand) // + return start_on(strand) // | then([st, uid]() -> polymorphic_continuation_t<> { // close this end first auto it = st->external_source_pipes.find(uid); @@ -147,6 +147,18 @@ namespace agents { LOG_DEBUG("Unexpected message ipc::msg_cpu_state_change_t; ignoring"); } + /** Handle one of the IPC variant values */ + static void cont_on_recv_message(ipc::msg_capture_failed_t const & /*message*/) + { + LOG_DEBUG("Unexpected message ipc::msg_capture_failed_t; ignoring"); + } + + /** Handle one of the IPC variant values */ + static void cont_on_recv_message(ipc::msg_capture_started_t const & /*message*/) + { + LOG_DEBUG("Unexpected message ipc::msg_capture_started_t; ignoring"); + } + /** Handle the 'ready' IPC message variant. The agent is ready. */ void cont_on_recv_message(ipc::msg_ready_t const & /*message*/) { @@ -266,8 +278,10 @@ namespace agents { return repeatedly( [st]() { - return start_on(st->strand) // - | then([st]() { return (st->get_state() != state_t::terminated); }); + // don't stop until the agent terminates and closes the connection from its end + LOG_DEBUG("Receive loop would have terminated? %d", + (st->get_state() >= state_t::terminated_pending_message_loop)); + return true; }, [st]() { return st->source().async_recv_message(use_continuation) // @@ -284,27 +298,33 @@ namespace agents { static constexpr char const * get_agent_process_id() { return agent_id_ext_source.data(); } ext_source_agent_worker_t(boost::asio::io_context & io_context, - agent_process_t const & agent_process, + agent_process_t && agent_process, state_change_observer_t && state_change_observer, ExternalSource & external_source) - : agent_worker_base_t(agent_process, std::move(state_change_observer)), + : agent_worker_base_t(std::move(agent_process), std::move(state_change_observer)), strand(io_context), external_source(external_source) { } /** Start the worker. Spawns the receive-message loop on the io_context */ - void start() + [[nodiscard]] bool start() { using namespace async::continuations; - cont_recv_message_loop() // - | finally([st = this->shared_from_this()](auto err) { - // log the failure - if (error_swallower_t::consume("IPC message loop", err)) { + spawn("IPC message loop", + cont_recv_message_loop(), // + [st = this->shared_from_this()](bool error) { + LOG_DEBUG("Receive loop ended"); + + boost::asio::post(st->strand, [st]() { st->set_message_loop_terminated(); }); + + if (error) { st->shutdown(); } }); + + return this->exec_agent(); } /** Called when SIGCHLD is received for the remote process */ @@ -312,13 +332,13 @@ namespace agents { { using namespace async::continuations; - start_on(strand) // - | then([st = this->shared_from_this()]() { - if (st->transition_state(state_t::terminated)) { - LOG_DEBUG("ext_source agent is now terminated"); - } - }) // - | DETACH_LOG_ERROR("SIGCHLD handler operation"); + spawn("SIGCHLD handler operation", + start_on(strand) // + | then([st = this->shared_from_this()]() { + if (st->transition_state(state_t::terminated)) { + LOG_DEBUG("ext_source agent is now terminated"); + } + })); } /** Called to shutdown the remote process and worker */ @@ -326,8 +346,10 @@ namespace agents { { using namespace async::continuations; - cont_shutdown() // - | DETACH_LOG_ERROR("Shutdown request"); + spawn("Shutdown request", cont_shutdown()); } + + protected: + [[nodiscard]] boost::asio::io_context::strand & work_strand() override { return strand; } }; } diff --git a/daemon/agents/perf/apc_encoders.cpp b/daemon/agents/perf/apc_encoders.cpp deleted file mode 100644 index 77acfa11..00000000 --- a/daemon/agents/perf/apc_encoders.cpp +++ /dev/null @@ -1,104 +0,0 @@ -/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ - -#include "agents/perf/apc_encoders.h" - -#include "Protocol.h" -#include "agents/perf/async_buffer_builder.h" -#include "agents/perf/record_types.h" - -#include - -namespace agents::perf::encoders { - - std::size_t data_record_apc_encoder_t::get_bytes_required(const data_record_chunk_tuple_t & record, - std::size_t offset_in_record) - { - return (record.number_of_elements() - offset_in_record) * buffer_utils::MAXSIZE_PACK64; - } - - std::size_t data_record_apc_encoder_t::encode_into(async::async_buffer_t::mutable_buffer_type buffer, - async::async_buffer_t::commit_action_t action, - const data_record_chunk_tuple_t & record, - int cpu, - uint64_t /*tail_pointer*/, - std::size_t offset_in_record) - { - auto builder = async_buffer_builder_t(buffer, std::move(action)); - - builder.beginFrame(FrameType::PERF_DATA); - builder.packInt(cpu); - // skip the length field for now - const auto length_index = builder.getWriteIndex(); - builder.advanceWrite(4); - - std::size_t bytes_written = 0; - - auto offset = offset_in_record; - - // copy as much of the first record as we can - const auto & first = record.first_chunk; - for (; offset < first.word_count && builder.bytesAvailable() >= buffer_utils::MAXSIZE_PACK64; ++offset) { - auto value = static_cast(first.chunk_pointer[offset]); - bytes_written += builder.packInt64(value); - } - - // if there's a second chunk, and we have space, start copying it - auto second = record.optional_second_chunk; - if (offset >= first.word_count && second.chunk_pointer != nullptr) { - auto second_offset = offset - first.word_count; - - for (; second_offset < second.word_count && builder.bytesAvailable() >= buffer_utils::MAXSIZE_PACK64; - ++second_offset) { - auto value = static_cast(second.chunk_pointer[second_offset]); - bytes_written += builder.packInt64(value); - } - - offset = second_offset + first.word_count; - } - - // now fill in the length field - const std::array length_buffer {char(bytes_written), - char(bytes_written >> 8), - char(bytes_written >> 16), - char(bytes_written >> 24)}; - builder.writeDirect(length_index, length_buffer.data(), length_buffer.size()); - - // commit the frame - builder.endFrame(); - - // return the offset of the first data word that we didn't manage to consume. - // the next iteration will pick up from here - return offset; - } - - std::size_t aux_record_apc_encoder_t::get_bytes_required(const aux_record_chunk_t & record, - std::size_t offset_in_record) - { - return (record.number_of_elements() - offset_in_record) * buffer_utils::MAXSIZE_PACK64; - } - - std::size_t aux_record_apc_encoder_t::encode_into(async::async_buffer_t::mutable_buffer_type buffer, - async::async_buffer_t::commit_action_t action, - const aux_record_chunk_t & record, - int cpu, - uint64_t tail_pointer, - std::size_t offset_in_record) - { - auto builder = async_buffer_builder_t(buffer, std::move(action)); - - // after the header, how many bytes of the record can we fit into the buffer? - const auto bytes_left_in_record = record.byte_count - offset_in_record; - const auto num_bytes_to_copy = std::min(bytes_left_in_record, buffer.size() - max_header_size); - - builder.beginFrame(FrameType::PERF_AUX); - builder.packInt(cpu); - builder.packInt64(static_cast(tail_pointer)); - builder.packInt(static_cast(num_bytes_to_copy)); - builder.writeBytes(record.chunk_pointer + offset_in_record, num_bytes_to_copy); - builder.endFrame(); - - // return the new offset so we can pick up from this point on the next iteration - return offset_in_record + num_bytes_to_copy; - } - -} diff --git a/daemon/agents/perf/apc_encoders.h b/daemon/agents/perf/apc_encoders.h deleted file mode 100644 index 214c1312..00000000 --- a/daemon/agents/perf/apc_encoders.h +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ - -#pragma once - -#include "BufferUtils.h" -#include "agents/perf/record_types.h" -#include "async/async_buffer.hpp" - -#include - -namespace agents::perf::encoders { - - /** - * Instances of this class are capable of encoding a record from the Perf ring buffer - * in the APC format and writing it into a preallocated buffer. - */ - class data_record_apc_encoder_t { - public: - // TODO: the old PerfToMemoryBuffer code doesn't impose a limit on the - // size of a PERF_DATA payload. Might need to revisit this as it won't - // work with the async_buffer_t allocation limits - static constexpr int max_payload_size = std::numeric_limits::max(); - static constexpr int max_header_size = buffer_utils::MAXSIZE_PACK32 // frame type - + buffer_utils::MAXSIZE_PACK32 // cpu - + sizeof(std::uint32_t); // blob length - - /** - * Calculates the number of bytes of buffer space required to fully encode the - * remainder of the data record. - * - * @param record The record that is to be processed. - * @param offset_in_record The offset of the first unconsumed element in the record. - * @return The number of bytes required to encode the record data. - */ - static std::size_t get_bytes_required(const data_record_chunk_tuple_t & record, std::size_t offset_in_record); - - /** - * Encode the contents of the record into the specified buffer. The buffer will be at most - * max_header_size + max_payload_size bytes in length. If the encoded record won't fit into - * that space a second call will be made to this method with a buffer for the remainder. - * - * @param buffer The preallocated buffer to copy into. - * @param action Once the buffer has been populated either action.commit() or action.discard() - * should be called to indicate that the buffer has been used and should be passed to a - * consumer. - * @param record The record to encode. - * @param cpu The record was taken from this CPU's ring buffer. - * @param tail_pointer The tail pointer of the CPU's ring buffer at the point this - * record was taken. - * @param offset_in_record The index of the first unconsumed element in the record. - * Elements before this will have already been encoded by prior calls to encode_into. - * @return An updated offset into the record of the next element to be encoded. In other - * words: offset_in_record + number of elements encoded by this call. - */ - static std::size_t encode_into(async::async_buffer_t::mutable_buffer_type buffer, - async::async_buffer_t::commit_action_t action, - const data_record_chunk_tuple_t & record, - int cpu, - uint64_t tail_pointer, - std::size_t offset_in_record); - }; - - /** - * Instances of this class are capable of encoding a record from the Perf aux ring buffer - * in the APC format and writing it into a preallocated buffer. - */ - class aux_record_apc_encoder_t { - public: - static constexpr int max_header_size = buffer_utils::MAXSIZE_PACK32 // frame type - + buffer_utils::MAXSIZE_PACK32 // cpu - + buffer_utils::MAXSIZE_PACK64 // tail - + buffer_utils::MAXSIZE_PACK32; // size - static constexpr int max_payload_size = ISender::MAX_RESPONSE_LENGTH - max_header_size; - - static std::size_t get_bytes_required(const aux_record_chunk_t & record, std::size_t offset_in_record); - - static std::size_t encode_into(async::async_buffer_t::mutable_buffer_type buffer, - async::async_buffer_t::commit_action_t action, - const aux_record_chunk_t & record, - int cpu, - uint64_t tail_pointer, - std::size_t offset_in_record); - }; - -} diff --git a/daemon/agents/perf/async_buffer_builder.h b/daemon/agents/perf/async_buffer_builder.h index 0bc8a8c8..68b7cfea 100644 --- a/daemon/agents/perf/async_buffer_builder.h +++ b/daemon/agents/perf/async_buffer_builder.h @@ -5,7 +5,6 @@ #include "BufferUtils.h" #include "IRawFrameBuilder.h" #include "Logging.h" -#include "async/async_buffer.hpp" #include "lib/Assert.h" #include "lib/Span.h" @@ -36,22 +35,18 @@ namespace agents::perf { * @endcode */ template - class apc_buffer_builder_t : public IRawFrameBuilderWithDirectAccess { + class apc_buffer_builder_t { public: explicit apc_buffer_builder_t(BufferType & buffer) : buffer(buffer), write_index(0), start_of_current_frame(0) { } - ~apc_buffer_builder_t() override = default; - - // - // IRawFrameBuilder methods - // /** - * Buffer starts with FrameType. - * The Response Type header (eg: for APC, ResponseType::APC_DATA)), is not added to the buffer. - */ - void beginFrame(FrameType frameType) override + * Begins a new frame + * + * There must be no current frame + */ + void beginFrame(FrameType frameType) { start_of_current_frame = write_index; @@ -59,18 +54,25 @@ namespace agents::perf { } /** - * Discards any data written in the current frame and resets the write index. - */ - void abortFrame() override + * Aborts the current frame + * + * There must be a current frame + * There will be no current frame afterwards + */ + void abortFrame() { write_index = start_of_current_frame; buffer.resize(write_index); } /** - * Finishes the current frame. The buffer will no include the message length prefix. - */ - void endFrame() override + * Ends the current frame and commits it to the buffer + * + * There must be a current frame + * There will be no current frame afterwards + * Does not flush the buffer + */ + void endFrame() { auto payload_length = write_index - start_of_current_frame; if (payload_length <= frame_header_size) { @@ -83,40 +85,94 @@ namespace agents::perf { } /** - * Flush is not required for this implementation as the whole buffer is either committed - * or discarded when the builder instance is disposed. - * - * @return Always returns false. - */ - bool needsFlush() override { return false; } + * Gets the number of bytes available in the backing buffer + */ + [[nodiscard]] std::size_t bytesAvailable() const { return buffer.max_size() - write_index; } + + /** + * Packs a 32 bit number + * + * Must be required bytes available + */ + std::size_t packInt(std::int32_t x) + { + ensure_space_at(write_index, buffer_utils::MAXSIZE_PACK32); + int ignored = 0; // we don't use the wrapping feature + std::size_t != int + std::size_t n = buffer_utils::packInt(buffer.data() + write_index, ignored, x); + write_index += n; + return n; + } + + /** + * Packs a 32 bit number + * + * Must be required bytes available + */ + std::size_t packInt(std::uint32_t x) { return packInt(std::int32_t(x)); } + + /** + * Packs a 64 bit number + * + * Must be required bytes available + */ + std::size_t packInt64(std::int64_t x) + { + ensure_space_at(write_index, buffer_utils::MAXSIZE_PACK64); + int ignored = 0; // we don't use the wrapping feature + std::size_t != int + std::size_t n = buffer_utils::packInt64(buffer.data() + write_index, ignored, x); + write_index += n; + return n; + } /** - * Not needed for this implementation. This is a no-op. - */ - void flush() override {} + * Packs a 64 bit number + * + * Must be required bytes available + */ + std::size_t packInt64(std::uint64_t x) { return packInt64(std::int64_t(x)); } - [[nodiscard]] int bytesAvailable() const override + /** + * Packs a size_t number + * + * Must be required bytes available + */ + std::size_t packIntSize(std::size_t x) { - std::size_t available = buffer.max_size() - write_index; - if (available > std::numeric_limits::max()) { - return std::numeric_limits::max(); + if constexpr (sizeof(std::size_t) <= sizeof(std::uint32_t)) { + return packInt(std::uint32_t(x)); + } + else { + return packInt64(std::uint64_t(x)); } - return static_cast(available); } - int packInt(std::int32_t x) override + /** + * Packs a monotonic_delta_t + * + * Must be required bytes available + */ + std::size_t packMonotonicDelta(monotonic_delta_t x) { return packInt64(std::uint64_t(x)); } + + /** Write a 32-bit unsigned int in little endian form */ + void writeLeUint32(std::uint32_t n) { - ensure_space_at(write_index, buffer_utils::MAXSIZE_PACK32); - return buffer_utils::packInt(buffer.data(), write_index, x); + std::array const buffer {char(n), char(n >> 8U), char(n >> 16U), char(n >> 24U)}; + writeBytes(buffer.data(), buffer.size()); } - int packInt64(std::int64_t x) override + /** Write a 32-bit unsigned int in little endian form */ + void writeLeUint32At(std::size_t index, std::uint32_t n) { - ensure_space_at(write_index, buffer_utils::MAXSIZE_PACK64); - return buffer_utils::packInt64(buffer.data(), write_index, x); + std::array const buffer {char(n), char(n >> 8U), char(n >> 16U), char(n >> 24U)}; + writeDirect(index, buffer.data(), buffer.size()); } - void writeBytes(const void * data, std::size_t count) override + /** + * Writes some arbitrary bytes to the frame + * + * Must be required bytes available + */ + void writeBytes(const void * data, std::size_t count) { if (count == 0) { return; @@ -128,7 +184,12 @@ namespace agents::perf { write_index += count; } - void writeString(std::string_view str) override + /** + * Writes a string to the frame + * + * Must be required bytes available + */ + void writeString(std::string_view str) { auto len = str.size(); if (len > std::numeric_limits::max()) { @@ -138,34 +199,31 @@ namespace agents::perf { writeBytes(str.data(), len); } - void waitForSpace(int bytes) override - { - if (!supportsWriteOfSize(bytes)) { - runtime_assert(false, "Attempted to overflow apc_buffer_builder_t size"); - } - } - - [[nodiscard]] bool supportsWriteOfSize(int bytes) const override + /** Checks if it is possible to write a block of the given size to this buffer + * + * @param bytes Number of bytes to check + * @return True if it is possible, or false if would always fail + */ + [[nodiscard]] bool supportsWriteOfSize(std::size_t bytes) const { if (bytes < 0) { return false; } - return static_cast(bytes) <= buffer.max_size() - static_cast(write_index); + return bytes <= (buffer.max_size() - getWriteIndex()); } - // - // IRawFrameBuilderWithDirectAccess methods - // + /** @return The raw write index */ + [[nodiscard]] std::size_t getWriteIndex() const { return write_index; } - [[nodiscard]] int getWriteIndex() const override { return write_index; } - - void advanceWrite(int bytes) override + /** Skip the write index forward by 'bytes' */ + void advanceWrite(std::size_t bytes) { ensure_space_at(write_index, bytes); write_index += bytes; } - void writeDirect(int index, const void * data, std::size_t count) override + /** Write directly into the buffer */ + void writeDirect(std::size_t index, const void * data, std::size_t count) { if (count == 0) { return; @@ -175,163 +233,31 @@ namespace agents::perf { ::memcpy(buffer.data() + index, data, count); } + void trimTo(std::size_t size) + { + runtime_assert(size <= write_index, "trimTo cannot extend the buffer"); + + buffer.resize(size); + write_index = size; + } + private: // number of bytes in a frame header. frames will need to be bigger than // this to be committed to the buffer - static constexpr int frame_header_size = 1; + static constexpr std::size_t frame_header_size = 1; BufferType & buffer; - int write_index; - int start_of_current_frame; + std::size_t write_index; + std::size_t start_of_current_frame; - void ensure_space_at(int pos, int bytes) + void ensure_space_at(std::size_t pos, std::size_t bytes) { runtime_assert(pos + bytes > 0, "Size must not be negative"); - const auto request_size = static_cast(pos + bytes); + const auto request_size = (pos + bytes); runtime_assert(request_size <= buffer.max_size(), "Cannot grow apc_buffer_builder_t past its limit"); if (buffer.size() < request_size) { buffer.resize(request_size); } } }; - - /** - * An adapter that allows an async::async_buffer_t to be used as an APC frame builder. - */ - class async_buffer_builder_t : public IRawFrameBuilderWithDirectAccess { - public: - /** - * Constructs an async_buffer_builder_t that wraps the specified async_buffer_t. - * The commit_action_t is used to commit or discard the underlying buffer based on - * whether any frames were written out. - */ - async_buffer_builder_t(async::async_buffer_t::mutable_buffer_type buffer, - async::async_buffer_t::commit_action_t commit_action) - : writer(buffer), builder(writer), commit_action(std::move(commit_action)) - { - } - - async_buffer_builder_t(const async_buffer_builder_t &) = delete; - async_buffer_builder_t & operator=(const async_buffer_builder_t &) = delete; - - ~async_buffer_builder_t() override - { - const auto size = builder.getWriteIndex(); - if (size > 0) { - boost::system::error_code ec {}; - if (!commit_action.commit(ec, size)) { - LOG_ERROR("Failed to commit %d bytes to async_buffer_t: %s", size, ec.message().c_str()); - } - } - else { - commit_action.discard(); - } - } - - // - // IRawFrameBuilder methods - // - /** - * @copydoc apc_buffer_builder_t::beginFrame(FrameType) - */ - void beginFrame(FrameType frameType) override { builder.beginFrame(frameType); } - - /** - * @copydoc apc_buffer_builder_t::abortFrame() - */ - void abortFrame() override { builder.abortFrame(); } - - /** - * @copydoc apc_buffer_builder_t::endFrame() - */ - void endFrame() override { builder.endFrame(); } - - /** - * @copydoc apc_buffer_builder_t::needsFlush() - */ - bool needsFlush() override { return builder.needsFlush(); } - - /** - * @copydoc apc_buffer_builder_t::flush() - */ - void flush() override { builder.flush(); } - - /** - * @copydoc apc_buffer_builder_t::bytesAvailable() - */ - [[nodiscard]] int bytesAvailable() const override { return builder.bytesAvailable(); } - - /** - * @copydoc apc_buffer_builder_t::packInt(std::int32_t) - */ - int packInt(std::int32_t x) override { return builder.packInt(x); } - - /** - * @copydoc apc_buffer_builder_t::packInt65(std::int64_t) - */ - int packInt64(std::int64_t x) override { return builder.packInt64(x); } - - /** - * @copydoc apc_buffer_builder_t::writeBytes(const void*, std::size_t) - */ - void writeBytes(const void * data, std::size_t count) override { return builder.writeBytes(data, count); } - - /** - * @copydoc apc_buffer_builder_t::writeString(std::string_view) - */ - void writeString(std::string_view str) override { return builder.writeString(str); } - /** - * @copydoc apc_buffer_builder_t::waitForSpace(int) - */ - void waitForSpace(int bytes) override { builder.waitForSpace(bytes); } - - /** - * @copydoc apc_buffer_builder_t::supportsWriteOfSize(int) - */ - [[nodiscard]] bool supportsWriteOfSize(int bytes) const override { return builder.supportsWriteOfSize(bytes); } - - // - // IRawFrameBuilderWithDirectAccess methods - // - - /** - * @copydoc apc_buffer_builder_t::getWriteIndex() - */ - [[nodiscard]] int getWriteIndex() const override { return builder.getWriteIndex(); } - - /** - * @copydoc apc_buffer_builder_t::advanceWrite(int) - */ - void advanceWrite(int bytes) override { builder.advanceWrite(bytes); } - - /** - * @copydoc apc_buffer_builder_t::writeDirect(int, const void *, std::size_t) - */ - void writeDirect(int index, const void * data, std::size_t count) override - { - builder.writeDirect(index, data, count); - } - - private: - class char_span_writer_t { - public: - explicit char_span_writer_t(lib::Span span) : span(span), write_pointer(0) {} - - [[nodiscard]] char * data() { return span.data(); } - - [[nodiscard]] std::size_t size() const { return span.size(); } - - [[nodiscard]] std::size_t max_size() const { return span.size(); } - - void resize(std::size_t size) { write_pointer = std::min(size, span.size()); } - - private: - lib::Span span; - std::size_t write_pointer; - }; - - char_span_writer_t writer; - apc_buffer_builder_t builder; - async::async_buffer_t::commit_action_t commit_action; - }; } diff --git a/daemon/agents/perf/async_perf_ringbuffer_monitor.hpp b/daemon/agents/perf/async_perf_ringbuffer_monitor.hpp new file mode 100644 index 00000000..5557f05c --- /dev/null +++ b/daemon/agents/perf/async_perf_ringbuffer_monitor.hpp @@ -0,0 +1,697 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "agents/perf/events/perf_ringbuffer_mmap.hpp" +#include "agents/perf/events/types.hpp" +#include "agents/perf/record_types.h" +#include "async/continuations/async_initiate.h" +#include "async/continuations/continuation.h" +#include "async/continuations/operations.h" +#include "async/continuations/stored_continuation.h" +#include "async/continuations/use_continuation.h" +#include "ipc/raw_ipc_channel_sink.h" +#include "lib/Assert.h" +#include "lib/EnumUtils.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace agents::perf { + + /** + * Monitors a set of file descriptors, and maintains a polling timer such that whenever an FD is readable, or whenever the timer fires, one or more of the associated data buffers will be flushed into the capture + * + * @tparam PerfActivator The perf_activator_t type, used to reenable aux fds + * @tparam PerfBufferConsumer The perf_buffer_consumer_t type + * @tparam StreamDescriptor The file descriptor type + */ + template + class async_perf_ringbuffer_monitor_t + : public std::enable_shared_from_this< + async_perf_ringbuffer_monitor_t> { + public: + using perf_activator_t = PerfActivator; + using perf_buffer_consumer_t = PerfBufferConsumer; + using stream_descriptor_t = StreamDescriptor; + using fd_aux_flag_pair_t = std::pair, bool>; + + static constexpr auto live_poll_interval = std::chrono::milliseconds(100); + static constexpr auto local_poll_interval = std::chrono::seconds(1); + + async_perf_ringbuffer_monitor_t(boost::asio::io_context & context, + std::shared_ptr const & ipc_sink, + std::shared_ptr const & perf_activator, + bool live_mode, + std::size_t one_shot_mode_limit) + : timer(context), + strand(context), + perf_activator(perf_activator), + perf_buffer_consumer(std::make_shared(context, ipc_sink, one_shot_mode_limit)), + live_mode(live_mode) + { + } + + async_perf_ringbuffer_monitor_t(boost::asio::io_context & context, + std::shared_ptr const & perf_activator, + std::shared_ptr perf_buffer_consumer, + bool live_mode) + : timer(context), + strand(context), + perf_activator(perf_activator), + perf_buffer_consumer(std::move(perf_buffer_consumer)), + live_mode(live_mode) + { + } + + /** Is the monitor was requested to terminate */ + [[nodiscard]] bool is_terminate_requested() const { return terminate_requested; } + /** Is the monitor terminated */ + [[nodiscard]] bool is_terminate_completed() const { return terminate_complete; } + + /** Start the polling timer */ + void start_timer() { do_start_timer(); } + + /** Terminate the monitor */ + void terminate() + { + using namespace async::continuations; + + LOG_TRACE("Terminating..."); + + spawn("stop perf event monitor", + start_on(strand) // + | then([st = this->shared_from_this()]() -> polymorphic_continuation_t<> { + st->terminate_requested = true; + st->timer.cancel(); + + for (auto & stream : st->primary_streams) { + boost::system::error_code ignored {}; + stream->close(ignored); + } + + for (auto & stream : st->supplimentary_streams) { + boost::system::error_code ignored {}; + stream->close(ignored); + } + + if (st->primary_streams.empty() && st->supplimentary_streams.empty()) { + // If there are no monitored streams then the termination_handler will never be called, so + // call the remove processing directly + return st->async_try_poll(); + } + + return {}; + })); + } + + /** + * Wait for notification that the required number of bytes is sent in one-shot mode + * NB: will never notify if one-shot mode is disabled + */ + template + auto async_wait_one_shot_full(CompletionToken && token) + { + return perf_buffer_consumer->async_wait_one_shot_full(std::forward(token)); + } + + /** + * Add a new ring buffer to the set of monitored ringbuffers + */ + template + auto async_add_ringbuffer(int cpu, + std::vector primary_fds, + std::vector supplimentary_fds, + std::shared_ptr mmap, + CompletionToken && token) + { + using namespace async::continuations; + + LOG_TRACE("async_add_ringbuffer(%d, %zu, %zu, %p)", + cpu, + primary_fds.size(), + supplimentary_fds.size(), + mmap->header()); + + return async_initiate( + [st = this->shared_from_this(), + primary_fds = std::move(primary_fds), + supplimentary_fds = std::move(supplimentary_fds), + mmap = std::move(mmap), + cpu]() mutable { + return start_on(st->strand) // + | then([st, cpu]() { + // should not already be tracked? + runtime_assert(st->cpu_fd_counter.find(cpu) == st->cpu_fd_counter.end(), + "a mmap is already tracked"); + }) // + | st->perf_buffer_consumer->async_add_ringbuffer(cpu, + std::move(mmap), + use_continuation) // + | map_error() // + | post_on(st->strand) // + | then([st, + primary_fds = std::move(primary_fds), + supplimentary_fds = std::move(supplimentary_fds), + cpu]() { + for (auto pair : primary_fds) { + st->do_observer_perf_fd(cpu, pair.first, true, pair.second); + } + for (auto pair : supplimentary_fds) { + st->do_observer_perf_fd(cpu, pair.first, false, pair.second); + } + }); + }, + std::forward(token)); + } + + /** + * Add a new ring buffer to the set of monitored ringbuffers + */ + template + auto async_add_additional_event_fds(std::vector> primary_fds, + std::vector> supplimentary_fds, + CompletionToken && token) + { + using namespace async::continuations; + + LOG_TRACE("async_add_additional_event_fds(%zu, %zu)", primary_fds.size(), supplimentary_fds.size()); + + return async_initiate( + [st = this->shared_from_this(), + primary_fds = std::move(primary_fds), + supplimentary_fds = std::move(supplimentary_fds)]() mutable { + return start_on(st->strand) // + | then([st, + primary_fds = std::move(primary_fds), + supplimentary_fds = std::move(supplimentary_fds)]() { + for (auto pair : primary_fds) { + st->do_observer_perf_fd(lib::toEnumValue(pair.first), + pair.second.first, + true, + pair.second.second); + } + for (auto pair : supplimentary_fds) { + st->do_observer_perf_fd(lib::toEnumValue(pair.first), + pair.second.first, + false, + pair.second.second); + } + }); + }, + std::forward(token)); + } + + /** + * Wait for a specific mmap to be removed + */ + template + auto await_mmap_removed(int cpu, CompletionToken && token) + { + using namespace async::continuations; + + LOG_TRACE("await_mmap_removed(%d)", cpu); + + return async_initiate_explicit( + [st = this->shared_from_this(), cpu](auto && sc) mutable { + submit(start_on(st->strand) // + | then([st, sc = sc.move(), cpu]() mutable { + // is it already not tracked, just let the continuation know + auto it = st->cpu_fd_counter.find(cpu); + if (it == st->cpu_fd_counter.end()) { + LOG_TRACE("mmap %d is already removed", cpu); + return resume_continuation(st->strand.context(), std::move(sc)); + } + + // store it for later + auto res = st->cpu_shutdown_monitors.try_emplace(cpu, std::move(sc)); + + // shouldn't be two handlers + runtime_assert(!res.second, "Can't register two mmap removal handlers"); + }), + sc.get_exceptionally()); + }, + std::forward(token)); + } + + /** + * Wait for the ringbuffer to be fully terminated (i.e terminate is requested, and all buffers are removed and fully drained) + */ + template + auto async_wait_terminated(CompletionToken && token) + { + using namespace async::continuations; + + LOG_TRACE("async_wait_terminated()"); + + return async_initiate_explicit( + [st = this->shared_from_this()](auto && sc) mutable { + submit(start_on(st->strand) // + | then([st, sc = sc.move()]() mutable { + // if already terminated, just let the continuation know + if (st->terminate_complete) { + LOG_TRACE("already terminated"); + return resume_continuation(st->strand.context(), std::move(sc)); + } + + // shouldn't be two + runtime_assert(!st->termination_handler, + "Can't register two termination handlers"); + + // store it for later + st->termination_handler = std::move(sc); + }), + sc.get_exceptionally()); + }, + std::forward(token)); + } + + private: + boost::asio::steady_timer timer; + boost::asio::io_context::strand strand; + std::shared_ptr perf_activator; + std::shared_ptr perf_buffer_consumer; + // pending_cpus is split into a read and write list so that the poll/remove loop + // doesn't get blocked from calling the remove part of the loop by + // cpu-nos being repeatedly added to the pending list. + std::array, 2> pending_cpus_rw {}; + std::deque * pending_cpus_read {&pending_cpus_rw[0]}; + std::deque * pending_cpus_write {&pending_cpus_rw[1]}; + std::array>>, 2> cpu_aux_streams_rw {}; + std::map>> * cpu_aux_streams_read {&cpu_aux_streams_rw[0]}; + std::map>> * cpu_aux_streams_write {&cpu_aux_streams_rw[1]}; + std::deque removed_cpus {}; + std::map cpu_fd_counter {}; + std::map> cpu_shutdown_monitors {}; + std::set> primary_streams {}; + std::set> supplimentary_streams {}; + async::continuations::stored_continuation_t<> termination_handler {}; + bool live_mode; + bool busy_polling {false}; + bool poll_all {false}; + bool terminate_complete {false}; + bool terminate_requested {false}; + bool any_added {false}; + + /** Asynchronously remove one item from the remove list */ + async::continuations::polymorphic_continuation_t async_remove() + { + using namespace async::continuations; + + LOG_TRACE("called async_remove, t=%u, a=%u, p=%zu, s=%zu, r=%zu", + terminate_requested, + any_added, + primary_streams.size(), + supplimentary_streams.size(), + removed_cpus.size()); + + if (!removed_cpus.empty()) { + auto const cpu_no = removed_cpus.front(); + removed_cpus.pop_front(); + + LOG_TRACE("Requesting to remove ringbuffer for cpu %d", cpu_no); + + return start_on(strand.context()) // + | perf_buffer_consumer->async_remove_ringbuffer(cpu_no, + use_continuation) // + | post_on(strand) // + | then([cpu_no, st = this->shared_from_this()](auto ec) { + LOG_TRACE("Removed cpu #%d, got ec=%s", cpu_no, ec.message().c_str()); + + // remove the counter + st->cpu_fd_counter.erase(cpu_no); + + // notify the handler + auto handler = std::move(st->cpu_shutdown_monitors[cpu_no]); + if (handler) { + LOG_TRACE("notifying that mmap %d is removed", cpu_no); + resume_continuation(st->strand.context(), std::move(handler)); + } + + // remove the next one; any previous error is logged and swallowed + return st->async_remove(); + }); + } + + // have we terminated? + if (terminate_requested && primary_streams.empty() && supplimentary_streams.empty() + && removed_cpus.empty()) { + // yup + terminate_complete = true; + // notify the handler + if (termination_handler) { + LOG_TRACE("notifying terminated"); + resume_continuation(strand.context(), std::move(termination_handler)); + } + } + // did all the primary streams close? means the traced app exited + else if ((!terminate_requested) && any_added && primary_streams.empty() && removed_cpus.empty()) { + LOG_TRACE("notifying all-exited"); + perf_buffer_consumer->trigger_one_shot_mode(); + } + + return start_with(boost::system::error_code {}); + } + + /** Asynchronously poll either all cpus OR each item in the pending list */ + async::continuations::polymorphic_continuation_t async_poll(bool poll_all) + { + using namespace async::continuations; + + LOG_TRACE("called async_poll, poll_all=%u, t=%u, a=%u, p=%zu, s=%zu, r=%zu", + poll_all, + terminate_requested, + any_added, + primary_streams.size(), + supplimentary_streams.size(), + removed_cpus.size()); + + if (poll_all) { + // clear the per-cpu list as all the cores are about to be polled + pending_cpus_read->clear(); + + LOG_TRACE("Requesting to poll_all"); + + return start_on(strand.context()) // + | perf_buffer_consumer->async_poll_all(use_continuation) // + | then([](auto ec) { + LOG_TRACE("Polled all, got ec=%s", ec.message().c_str()); + return ec; + }) // + | map_error() // + | post_on(strand) // + | then([st = this->shared_from_this()]() { + // move the read list into a local as we want to clear the read list on completion of poll + std::map>> cpu_aux_streams { + std::move(*st->cpu_aux_streams_read)}; + + // re-enable any AUX items that might have got disabled due to mmap full + for (auto & entry : cpu_aux_streams) { + for (auto & fd : entry.second) { + st->perf_activator->re_enable(fd->native_handle()); + } + } + + // now remove any queued for remove + return st->async_remove(); + }); + } + + if (!pending_cpus_read->empty()) { + // poll one item from the pending list + auto cpu_no = pending_cpus_read->front(); + pending_cpus_read->pop_front(); + + LOG_TRACE("Requesting to poll ringbuffer for cpu %d", cpu_no); + + return start_on(strand.context()) // + | perf_buffer_consumer->async_poll(cpu_no, use_continuation) // + | then([cpu_no](auto ec) { + LOG_TRACE("Polled cpu #%d, got ec=%s", cpu_no, ec.message().c_str()); + return ec; + }) // + | map_error() // + | post_on(strand) // + | then([st = this->shared_from_this(), cpu_no]() { + // re-enable any AUX items that might have got disabled due to mmap full + auto it = st->cpu_aux_streams_read->find(cpu_no); + if (it != st->cpu_aux_streams_read->end()) { + // re-enable + for (auto & fd : it->second) { + st->perf_activator->re_enable(fd->native_handle()); + } + + // remove it + st->cpu_aux_streams_read->erase(it); + } + + // try again for the next item + return st->async_poll(false); + }); + } + + // check for any removed items + return async_remove(); + } + + /** Recursive loop for the body of async_try_poll */ + static async::continuations::polymorphic_continuation_t async_try_poll_body( + std::shared_ptr st) + { + using namespace async::continuations; + + // process the list contents + return st->async_poll(std::exchange(st->poll_all, false)) // + | post_on(st->strand) // + | then([st](auto ec) -> polymorphic_continuation_t { + // swap the read/write pointers again, repeat if there are more events pending... + st->swap_read_write_poll_lists(); + + // finish if the new read list is empty + if (st->pending_cpus_read->empty()) { + LOG_TRACE("async_try_poll :: complete"); + st->busy_polling = false; + return start_with(ec); + } + + LOG_TRACE("async_try_poll :: iterating"); + + // otherwise poll again + return async_try_poll_body(st); + }); + } + + /** swap the read/write pointers so that the write list becomes the read list and vice-versa. This allows the poll loop to access the read list, whilst the event monitors access the write list */ + void swap_read_write_poll_lists() + { + std::swap(pending_cpus_read, pending_cpus_write); + std::swap(cpu_aux_streams_read, cpu_aux_streams_write); + runtime_assert(pending_cpus_write->empty(), "expected write list to be empty"); + runtime_assert(cpu_aux_streams_write->empty(), "expected write list to be empty"); + } + + /** Poll if some poll loop was not already active */ + auto async_try_poll() + { + using namespace async::continuations; + + LOG_TRACE("async_try_poll"); + + auto st = this->shared_from_this(); + + return start_on(st->strand) // + | do_if_else([st]() { return !std::exchange(st->busy_polling, true); }, // + [st]() { + LOG_TRACE("async_try_poll :: busy"); + + // swap the read and write list so that the poll loop consumes the old write list, and the monitors can write to the old read list + st->swap_read_write_poll_lists(); + + return async_try_poll_body(st); + }, + []() { + LOG_TRACE("async_try_poll :: skip"); + + return boost::system::error_code {}; + }) + | map_error(); + } + + /** Observe the file descriptor for read events */ + void do_observer_perf_fd(int cpu_no, + std::shared_ptr stream_descriptor, + bool primary, + bool is_aux) + { + using namespace async::continuations; + + auto st = this->shared_from_this(); + auto nh = stream_descriptor->native_handle(); + + LOG_TRACE("Observing new fd %d %d %u", cpu_no, stream_descriptor->native_handle(), primary); + + // and wait for data to be available + spawn("perf buffer monitor for event fd", + start_on(strand) // + | then([st, cpu_no, stream_descriptor, primary]() { + if (!st->is_terminate_requested()) { + if (primary) { + st->primary_streams.insert(stream_descriptor); + st->cpu_fd_counter[cpu_no] += 1; + st->any_added = true; + } + else { + st->supplimentary_streams.insert(stream_descriptor); + } + } + }) // + | repeatedly( + [st]() { + return start_on(st->strand) // + | then([st]() { return (!st->is_terminate_requested()); }); + }, + [st, cpu_no, stream_descriptor, is_aux]() { + LOG_TRACE("waiting for notification on %d / %d", + cpu_no, + stream_descriptor->native_handle()); + + return stream_descriptor->async_wait(boost::asio::posix::stream_descriptor::wait_read, + use_continuation) + | post_on(st->strand) + | then([st, cpu_no, stream_descriptor, is_aux]( + boost::system::error_code const & ec) -> polymorphic_continuation_t<> { + LOG_TRACE("Received file descriptor notification for cpu=%d, fd=%d, ec=%s", + cpu_no, + stream_descriptor->native_handle(), + ec.message().c_str()); + + auto const already_contained = + std::any_of(st->pending_cpus_write->begin(), + st->pending_cpus_write->end(), + [cpu_no](int n) { return n == cpu_no; }); + + // add it to the wait queue, regardless of the error code + if (!already_contained) { + st->pending_cpus_write->emplace_back(cpu_no); + } + + // and add the fd to the re-enable set (event if already_contained cpu_no) + if (is_aux) { + (*st->cpu_aux_streams_write)[cpu_no].insert(stream_descriptor); + } + + if (ec) { + return start_with(ec) | map_error(); + } + + if (st->busy_polling || already_contained) { + return {}; + } + + return st->async_try_poll(); + }); + }), + [st, cpu_no, stream_descriptor, primary, is_aux, nh](bool) { + // mark it as removed + spawn("perf buffer event monitor - final flush", + start_on(st->strand) // + | then([st, cpu_no, stream_descriptor, primary, is_aux, nh]() + -> polymorphic_continuation_t<> { + LOG_TRACE("Removing file descriptor notification for cpu=%d / %d", cpu_no, nh); + + // explicitly close the FD in case we get here for any other reason than EOF + stream_descriptor->close(); + + if (primary) { + // decrement the per-cpu count + auto const n = --(st->cpu_fd_counter[cpu_no]); + + LOG_TRACE("... remove %d -> %zu", nh, n); + + if (n == 0) { + // add it to the remove queue + st->removed_cpus.emplace_back(cpu_no); + } + + // remove from monitored list + st->primary_streams.erase(stream_descriptor); + } + else { + // remove from monitored list + st->supplimentary_streams.erase(stream_descriptor); + } + + if (is_aux) { + // remove it from both lists as it does not need to be re-enabled + auto it_r = st->cpu_aux_streams_read->find(cpu_no); + if (it_r != st->cpu_aux_streams_read->end()) { + it_r->second.erase(stream_descriptor); + } + auto it_w = st->cpu_aux_streams_write->find(cpu_no); + if (it_w != st->cpu_aux_streams_write->end()) { + it_w->second.erase(stream_descriptor); + } + } + + if (st->busy_polling) { + return {}; + } + + return st->async_try_poll(); + })); + }); + + // observe for errors; will be notified when the FD is closed by the kernel on process exit. + spawn("perf buffer monitor for event fd close handler", + stream_descriptor->async_wait(boost::asio::posix::stream_descriptor::wait_error, use_continuation), + [stream_descriptor, nh](bool f) { + LOG_TRACE("Received close notification for %d was %u", nh, f); + stream_descriptor->close(); + }); + } + + /** Start the timer */ + void do_start_timer() + { + using namespace async::continuations; + + auto st = this->shared_from_this(); + + spawn("perf buffer timer", + repeatedly( + [st]() { + return start_on(st->strand) // + | then([st]() { + return (!st->is_terminate_requested()) || (st->primary_streams.size() > 0); + }); + }, // + [st]() { + st->timer.expires_from_now(st->live_mode ? live_poll_interval : local_poll_interval); + + return st->timer.async_wait(use_continuation) // + | post_on(st->strand) // + | then([st](boost::system::error_code const & ec) -> polymorphic_continuation_t<> { + LOG_TRACE("Timer tick: %s", ec.message().c_str()); + + // swallow cancel errors as it's just the timer being woken early + if ((ec) + && (ec + != boost::asio::error::make_error_code( + boost::asio::error::operation_aborted))) { + return start_with(ec) | map_error(); + } + + // if no error, then timeout occured so trigger a poll_all + if (!ec) { + st->poll_all = true; + } + + if (st->busy_polling) { + return {}; + } + + return st->async_try_poll(); + }); + }), + [st](bool) { + // always perform a final flush of any remaining data + spawn("perf buffer event timer - final flush", + start_on(st->strand) // + | then([st]() { st->poll_all = true; }) // + | st->async_try_poll()); + }); + } + }; +} diff --git a/daemon/agents/perf/capture_configuration.cpp b/daemon/agents/perf/capture_configuration.cpp index 53635bf5..84b04ded 100644 --- a/daemon/agents/perf/capture_configuration.cpp +++ b/daemon/agents/perf/capture_configuration.cpp @@ -3,8 +3,11 @@ #include "agents/perf/capture_configuration.h" #include "SessionData.h" +#include "agents/perf/events/event_configuration.hpp" +#include "agents/perf/events/types.hpp" #include "k/perf_event.h" #include "lib/Assert.h" +#include "linux/perf/PerfEventGroup.h" #include "linux/perf/PerfEventGroupIdentifier.h" #include @@ -22,6 +25,7 @@ namespace agents::perf { msg.set_sample_rate(session_data.mSampleRate); msg.set_one_shot(session_data.mOneShot); msg.set_exclude_kernel_events(session_data.mExcludeKernelEvents); + msg.set_stop_on_exit(session_data.mStopOnExit); } void add_perf_config(ipc::proto::shell::perf::capture_configuration_t::perf_config_t & msg, @@ -42,6 +46,7 @@ namespace agents::perf { msg.set_has_armv7_pmu_driver(perf_config.has_armv7_pmu_driver); msg.set_has_64bit_uname(perf_config.has_64bit_uname); msg.set_use_64bit_register_set(perf_config.use_64bit_register_set); + msg.set_has_exclude_callchain_kernel(perf_config.has_exclude_callchain_kernel); } template @@ -94,7 +99,7 @@ namespace agents::perf { void add_cpus(google::protobuf::RepeatedPtrField< ipc::proto::shell::perf::capture_configuration_t::cpu_properties_t> & msg, ICpuInfo const & cpu_info, - std::map const & cpu_number_to_spe_type) + std::map const & cpu_number_to_spe_type) { auto clusterIds = cpu_info.getClusterIds(); auto cpuIds = cpu_info.getCpuIds(); @@ -104,7 +109,7 @@ namespace agents::perf { entry->set_cluster_index(clusterIds[index]); entry->set_cpu_id(cpuIds[index]); auto it = cpu_number_to_spe_type.find(int(index)); - if (it != cpu_number_to_spe_type.begin()) { + if (it != cpu_number_to_spe_type.end()) { entry->set_spe_type(it->second); } } @@ -157,38 +162,6 @@ namespace agents::perf { throw std::runtime_error("Matching pmu node not found"); } - void add_perf_event_group_identifier( - ipc::proto::shell::perf::capture_configuration_t::perf_event_group_identifier_t & msg, - PerfEventGroupIdentifier const & identifier, - ICpuInfo const & cpu_info, - lib::Span uncore_pmus) - { - switch (identifier.getType()) { - case PerfEventGroupIdentifier::Type::GLOBAL: { - msg.set_spe(false); - return; - } - case PerfEventGroupIdentifier::Type::SPE: { - msg.set_spe(true); - return; - } - case PerfEventGroupIdentifier::Type::PER_CLUSTER_CPU: { - msg.set_per_cluster_cpu(find_pmu_index(cpu_info.getClusters(), identifier.getCluster())); - return; - } - case PerfEventGroupIdentifier::Type::UNCORE_PMU: { - msg.set_uncore_pmu(find_pmu_index(uncore_pmus, identifier.getUncorePmu())); - return; - } - case PerfEventGroupIdentifier::Type::SPECIFIC_CPU: { - msg.set_specific_cpu(identifier.getCpuNumber()); - return; - } - default: - throw std::runtime_error("Unexpected type"); - } - } - void add_perf_event_attr(ipc::proto::shell::perf::capture_configuration_t::perf_event_attribute_t & msg, perf_event_attr const & attr) { @@ -233,7 +206,7 @@ namespace agents::perf { msg.set_aux_watermark(attr.aux_watermark); } - void add_perf_event(ipc::proto::shell::perf::capture_configuration_t::perf_event_t & msg, + void add_perf_event(ipc::proto::shell::perf::capture_configuration_t::perf_event_definition_t & msg, int key, perf_event_attr const & attr) { @@ -242,40 +215,81 @@ namespace agents::perf { add_perf_event_attr(*msg_attr, attr); } - void add_perf_group(ipc::proto::shell::perf::capture_configuration_t::perf_event_group_events_t & msg, + ipc::proto::shell::perf::capture_configuration_t_perf_event_definition_list_t & + get_perf_event_configuration_event_list( + ipc::proto::shell::perf::capture_configuration_t::perf_event_configuration_t & msg, + PerfEventGroupIdentifier const & identifier, + ICpuInfo const & cpu_info, + lib::Span uncore_pmus) + { + switch (identifier.getType()) { + case PerfEventGroupIdentifier::Type::GLOBAL: { + return *msg.mutable_global_events(); + } + case PerfEventGroupIdentifier::Type::SPE: { + return *msg.mutable_spe_events(); + } + case PerfEventGroupIdentifier::Type::PER_CLUSTER_CPU: { + auto index = find_pmu_index(cpu_info.getClusters(), identifier.getCluster()); + auto * map = msg.mutable_cluster_specific_events(); + return (*map)[index]; + } + case PerfEventGroupIdentifier::Type::UNCORE_PMU: { + auto index = find_pmu_index(uncore_pmus, identifier.getUncorePmu()); + auto * map = msg.mutable_uncore_specific_events(); + return (*map)[index]; + } + case PerfEventGroupIdentifier::Type::SPECIFIC_CPU: { + auto index = static_cast(identifier.getCpuNumber()); + auto * map = msg.mutable_cpu_specific_events(); + return (*map)[index]; + } + default: { + throw std::runtime_error("Unexpected type"); + } + } + } + + void add_perf_group(ipc::proto::shell::perf::capture_configuration_t::perf_event_configuration_t & msg, PerfEventGroupIdentifier const & identifier, - perf_event_group_common_state_t const & state, + perf_event_group_configurer_state_t const & state, ICpuInfo const & cpu_info, lib::Span uncore_pmus) { - add_perf_event_group_identifier(*msg.mutable_id(), identifier, cpu_info, uncore_pmus); - auto * msg_events = msg.mutable_events(); + auto & list = get_perf_event_configuration_event_list(msg, identifier, cpu_info, uncore_pmus); + auto * msg_events = list.mutable_events(); for (auto const & event : state.events) { auto * msg_entry = msg_events->Add(); add_perf_event(*msg_entry, event.key, event.attr); } } - void add_perf_groups(ipc::proto::shell::perf::capture_configuration_t::perf_groups_t & msg, - perf_groups_configurer_state_t const & perf_groups, - ICpuInfo const & cpu_info, - lib::Span uncore_pmus) + void add_event_configuration(ipc::proto::shell::perf::capture_configuration_t::perf_event_configuration_t & msg, + perf_groups_configurer_state_t const & perf_groups, + ICpuInfo const & cpu_info, + lib::Span uncore_pmus) { - msg.set_number_of_events_added(perf_groups.numberOfEventsAdded); + add_perf_event(*msg.mutable_header_event(), perf_groups.header.key, perf_groups.header.attr); - auto * msg_groups = msg.mutable_groups(); for (auto const & groups_entry : perf_groups.perfEventGroupMap) { - auto * msg_entry = msg_groups->Add(); - add_perf_group(*msg_entry, groups_entry.first, groups_entry.second.common, cpu_info, uncore_pmus); + add_perf_group(msg, groups_entry.first, groups_entry.second, cpu_info, uncore_pmus); } } void add_ringbuffer_config(ipc::proto::shell::perf::capture_configuration_t::perf_ringbuffer_config_t & msg, - perf_ringbuffer_config_t const & ringbuffer_config) + agents::perf::buffer_config_t const & ringbuffer_config) { - msg.set_page_size(ringbuffer_config.pageSize); - msg.set_data_size(ringbuffer_config.dataBufferSize); - msg.set_aux_size(ringbuffer_config.auxBufferSize); + msg.set_page_size(ringbuffer_config.page_size); + msg.set_data_size(ringbuffer_config.data_buffer_size); + msg.set_aux_size(ringbuffer_config.aux_buffer_size); + } + + void add_perf_pmu_type_to_name(google::protobuf::Map<::google::protobuf::uint32, std::string> & msg, + std::map const & perf_pmu_type_to_name) + { + for (auto const & entry : perf_pmu_type_to_name) { + msg[entry.first] = entry.second; + } } /// ------------------------------ deserializing @@ -288,6 +302,7 @@ namespace agents::perf { session_data.sample_rate = msg.sample_rate(); session_data.one_shot = msg.one_shot(); session_data.exclude_kernel_events = msg.exclude_kernel_events(); + session_data.stop_on_exit = msg.stop_on_exit(); } void extract_perf_config(ipc::proto::shell::perf::capture_configuration_t::perf_config_t const & msg, @@ -308,6 +323,7 @@ namespace agents::perf { perf_config.has_armv7_pmu_driver = msg.has_armv7_pmu_driver(); perf_config.has_64bit_uname = msg.has_64bit_uname(); perf_config.use_64bit_register_set = msg.use_64bit_register_set(); + perf_config.has_exclude_callchain_kernel = msg.has_exclude_callchain_kernel(); } void extract_clusters( @@ -333,15 +349,16 @@ namespace agents::perf { void extract_cpus(google::protobuf::RepeatedPtrField< ipc::proto::shell::perf::capture_configuration_t::cpu_properties_t> const & msg, - std::vector per_core_cluster_index, - std::vector per_core_cpuids, - std::map per_core_spe_type) + std::vector & per_core_cluster_index, + std::vector & per_core_cpuids, + std::map & per_core_spe_type) { std::int32_t index = 0; - for (auto cpu : msg) { + for (auto const & cpu : msg) { per_core_cluster_index.emplace_back(cpu.cluster_index()); per_core_cpuids.emplace_back(cpu.cpu_id()); - per_core_spe_type[index] = cpu.spe_type(); + per_core_spe_type[core_no_t(index)] = cpu.spe_type(); + ++index; } } @@ -367,35 +384,6 @@ namespace agents::perf { } } - PerfEventGroupIdentifier extract_perf_event_group_identifier( - ipc::proto::shell::perf::capture_configuration_t::perf_event_group_identifier_t const & msg, - std::vector const & clusters, - std::vector const & uncore_pmus, - std::map const & per_core_spe_type) - { - if (msg.has_per_cluster_cpu()) { - auto index = msg.per_cluster_cpu(); - runtime_assert(index < clusters.size(), "Invalid cluster index given"); - return {clusters[std::size_t(index)]}; - } - - if (msg.has_uncore_pmu()) { - auto index = msg.uncore_pmu(); - runtime_assert(index < uncore_pmus.size(), "Invalid uncore index given"); - return {uncore_pmus[std::size_t(index)]}; - } - - if (msg.has_specific_cpu()) { - return {msg.specific_cpu()}; - } - - if (msg.has_spe() && msg.spe()) { - return {per_core_spe_type}; - } - - return {}; - } - template void set_one_of(bool first, A & a, B & b, C c) { @@ -457,62 +445,62 @@ namespace agents::perf { return result; } - std::vector extract_perf_event_group_events( - google::protobuf::RepeatedPtrField const & - msg) + void extract_event_definition_list( + ipc::proto::shell::perf::capture_configuration_t::perf_event_definition_list_t const & msg, + std::vector & events) { - std::vector result {}; - - for (auto const & entry : msg) { - result.emplace_back(perf_event_t { + for (auto const & entry : msg.events()) { + events.emplace_back(event_definition_t { extract_perf_event_attr(entry.attr()), - entry.key(), + gator_key_t(entry.key()), }); } - - return result; } - void extract_perf_groups_group( - ipc::proto::shell::perf::capture_configuration_t::perf_event_group_events_t const & msg, - std::map & map, + void extract_event_configuration( + ipc::proto::shell::perf::capture_configuration_t::perf_event_configuration_t const & msg, + event_configuration_t & event_configuration, std::vector const & clusters, std::vector const & uncore_pmus, - std::map & core_no_to_spe_type) + std::size_t num_cores) { - auto result = map.try_emplace( - extract_perf_event_group_identifier(msg.id(), clusters, uncore_pmus, core_no_to_spe_type), - perf_event_group_common_state_t { - extract_perf_event_group_events(msg.events()), - }); + runtime_assert(msg.has_header_event(), "missing header_event"); - runtime_assert(result.second, "should have inserted perf event group definition"); - } + auto const & hm = msg.header_event(); + event_configuration.header_event = event_definition_t { + extract_perf_event_attr(hm.attr()), + gator_key_t(hm.key()), + }; - void extract_perf_groups(ipc::proto::shell::perf::capture_configuration_t::perf_groups_t const & msg, - perf_capture_configuration_t::perf_groups_t & perf_groups, - std::vector const & clusters, - std::vector const & uncore_pmus, - std::map & core_no_to_spe_type) - { - for (auto const & group : msg.groups()) { - extract_perf_groups_group(group, - perf_groups.perfEventGroupMap, - clusters, - uncore_pmus, - core_no_to_spe_type); + extract_event_definition_list(msg.global_events(), event_configuration.global_events); + extract_event_definition_list(msg.spe_events(), event_configuration.spe_events); + + for (auto const & entry : msg.cluster_specific_events()) { + runtime_assert(entry.first < clusters.size(), "Invalid cluster id received"); + auto id = cpu_cluster_id_t(entry.first); + extract_event_definition_list(entry.second, event_configuration.cluster_specific_events[id]); + } + + for (auto const & entry : msg.uncore_specific_events()) { + runtime_assert(entry.first < uncore_pmus.size(), "Invalid uncore id received"); + auto id = uncore_pmu_id_t(entry.first); + extract_event_definition_list(entry.second, event_configuration.uncore_specific_events[id]); } - perf_groups.numberOfEventsAdded = msg.number_of_events_added(); + for (auto const & entry : msg.cpu_specific_events()) { + runtime_assert(entry.first < num_cores, "Invalid core no received"); + auto id = core_no_t(entry.first); + extract_event_definition_list(entry.second, event_configuration.cpu_specific_events[id]); + } } void extract_ringbuffer_config( ipc::proto::shell::perf::capture_configuration_t::perf_ringbuffer_config_t const & msg, - perf_ringbuffer_config_t & ringbuffer_config) + buffer_config_t & ringbuffer_config) { - ringbuffer_config.pageSize = msg.page_size(); - ringbuffer_config.dataBufferSize = msg.data_size(); - ringbuffer_config.auxBufferSize = msg.aux_size(); + ringbuffer_config.page_size = msg.page_size(); + ringbuffer_config.data_buffer_size = msg.data_size(); + ringbuffer_config.aux_buffer_size = msg.aux_size(); } std::vector extract_args(google::protobuf::RepeatedPtrField && args) @@ -542,7 +530,7 @@ namespace agents::perf { }; } - void extract_wait_process(std::string & msg, std::optional & wait_process) + void extract_wait_process(std::string & msg, std::string & wait_process) { if (!msg.empty()) { wait_process = msg; @@ -556,6 +544,14 @@ namespace agents::perf { pids.insert(pid); } } + + void extract_perf_pmu_type_to_name(google::protobuf::Map<::google::protobuf::uint32, std::string> & msg, + std::map & perf_pmu_type_to_name) + { + for (auto & entry : msg) { + perf_pmu_type_to_name.emplace(entry.first, std::move(entry.second)); + } + } } /* create the message */ @@ -563,13 +559,15 @@ namespace agents::perf { SessionData const & session_data, PerfConfig const & perf_config, ICpuInfo const & cpu_info, - std::map const & cpu_number_to_spe_type, + std::map const & cpu_number_to_spe_type, lib::Span cluster_keys_for_cpu_frequency_counter, lib::Span uncore_pmus, lib::Span all_known_cpu_pmus, perf_groups_configurer_state_t const & perf_groups, - perf_ringbuffer_config_t const & ringbuffer_config, - bool enable_on_exec) + agents::perf::buffer_config_t const & ringbuffer_config, + std::map const & perf_pmu_type_to_name, + bool enable_on_exec, + bool stop_pids) { ipc::msg_capture_configuration_t result {}; @@ -579,11 +577,13 @@ namespace agents::perf { add_cpus(*result.suffix.mutable_cpus(), cpu_info, cpu_number_to_spe_type); add_uncore_pmus(*result.suffix.mutable_uncore_pmus(), uncore_pmus); add_cpuid_to_core_name(*result.suffix.mutable_cpuid_to_core_name(), all_known_cpu_pmus); - add_perf_groups(*result.suffix.mutable_perf_groups(), perf_groups, cpu_info, uncore_pmus); + add_event_configuration(*result.suffix.mutable_event_configuration(), perf_groups, cpu_info, uncore_pmus); add_ringbuffer_config(*result.suffix.mutable_ringbuffer_config(), ringbuffer_config); + add_perf_pmu_type_to_name(*result.suffix.mutable_perf_pmu_type_to_name(), perf_pmu_type_to_name); result.suffix.set_num_cpu_cores(cpu_info.getNumberOfCores()); result.suffix.set_enable_on_exec(enable_on_exec); + result.suffix.set_stop_pids(stop_pids); return result; } @@ -622,9 +622,9 @@ namespace agents::perf { } } - std::unique_ptr parse_capture_configuration_msg(ipc::msg_capture_configuration_t msg) + std::shared_ptr parse_capture_configuration_msg(ipc::msg_capture_configuration_t msg) { - auto result = std::make_unique(); + auto result = std::make_shared(); extract_session_data(msg.suffix.session_data(), result->session_data); extract_perf_config(msg.suffix.perf_config(), result->perf_config); @@ -637,18 +637,21 @@ namespace agents::perf { result->per_core_spe_type); extract_uncore_pmus(*msg.suffix.mutable_uncore_pmus(), result->uncore_pmus); extract_cpuid_to_core_name(*msg.suffix.mutable_cpuid_to_core_name(), result->cpuid_to_core_name); - extract_perf_groups(msg.suffix.perf_groups(), - result->perf_groups, - result->clusters, - result->uncore_pmus, - result->per_core_spe_type); + + result->num_cpu_cores = msg.suffix.num_cpu_cores(); + result->enable_on_exec = msg.suffix.enable_on_exec(); + result->stop_pids = msg.suffix.stop_pids(); + + extract_event_configuration(msg.suffix.event_configuration(), + result->event_configuration, + result->clusters, + result->uncore_pmus, + result->num_cpu_cores); extract_ringbuffer_config(msg.suffix.ringbuffer_config(), result->ringbuffer_config); extract_command(*msg.suffix.mutable_command(), result->command); extract_wait_process(*msg.suffix.mutable_wait_process(), result->wait_process); extract_pids(msg.suffix.pids(), result->pids); - - result->num_cpu_cores = msg.suffix.num_cpu_cores(); - result->enable_on_exec = msg.suffix.enable_on_exec(); + extract_perf_pmu_type_to_name(*msg.suffix.mutable_perf_pmu_type_to_name(), result->perf_pmu_type_to_name); return result; } diff --git a/daemon/agents/perf/capture_configuration.h b/daemon/agents/perf/capture_configuration.h index 2051ea93..90b2172d 100644 --- a/daemon/agents/perf/capture_configuration.h +++ b/daemon/agents/perf/capture_configuration.h @@ -4,10 +4,12 @@ #include "ICpuInfo.h" #include "SessionData.h" +#include "agents/perf/events/event_configuration.hpp" +#include "agents/perf/events/types.hpp" +#include "agents/perf/record_types.h" #include "ipc/messages.h" #include "k/perf_event.h" #include "lib/Assert.h" -#include "linux/perf/PerfBuffer.h" #include "linux/perf/PerfConfig.h" #include "linux/perf/PerfEventGroup.h" #include "linux/perf/PerfEventGroupIdentifier.h" @@ -21,7 +23,6 @@ namespace agents::perf { using perf_config_t = PerfConfig; using gator_cpu_t = GatorCpu; using uncore_pmu_t = UncorePmu; - using perf_groups_t = perf_groups_common_serialized_state_t; struct session_data_t { std::uint64_t live_rate; @@ -29,6 +30,7 @@ namespace agents::perf { std::int32_t sample_rate; bool one_shot; bool exclude_kernel_events; + bool stop_on_exit; }; struct command_t { @@ -50,16 +52,18 @@ namespace agents::perf { std::vector cluster_keys_for_cpu_frequency_counter {}; std::vector per_core_cluster_index {}; std::vector per_core_cpuids {}; - std::map per_core_spe_type {}; + std::map per_core_spe_type {}; std::vector uncore_pmus {}; std::map cpuid_to_core_name {}; - perf_groups_t perf_groups {}; - perf_ringbuffer_config_t ringbuffer_config {}; + std::map perf_pmu_type_to_name {}; + event_configuration_t event_configuration {}; + buffer_config_t ringbuffer_config {}; std::optional command {}; - std::optional wait_process {}; + std::string wait_process {}; std::set pids {}; std::uint32_t num_cpu_cores {}; bool enable_on_exec {}; + bool stop_pids {}; }; /** @@ -69,13 +73,15 @@ namespace agents::perf { SessionData const & session_data, PerfConfig const & perf_config, ICpuInfo const & cpu_info, - std::map const & cpu_number_to_spe_type, + std::map const & cpu_number_to_spe_type, lib::Span cluster_keys_for_cpu_frequency_counter, lib::Span uncore_pmus, lib::Span all_known_cpu_pmus, perf_groups_configurer_state_t const & perf_groups, - perf_ringbuffer_config_t const & ringbuffer_config, - bool enable_on_exec); + agents::perf::buffer_config_t const & ringbuffer_config, + std::map const & perf_pmu_type_to_name, + bool enable_on_exec, + bool stop_pids); /** Add a command to execute (for --app, --allow-cmd) */ void add_command(ipc::msg_capture_configuration_t & msg, @@ -91,6 +97,6 @@ namespace agents::perf { void add_pids(ipc::msg_capture_configuration_t & msg, std::set const & pids); /** Extract and validate the fields from the received msg. (Passed by value to allow moving out strings, rather than copying) */ - [[nodiscard]] std::unique_ptr parse_capture_configuration_msg( + [[nodiscard]] std::shared_ptr parse_capture_configuration_msg( ipc::msg_capture_configuration_t msg); }; diff --git a/daemon/agents/perf/cpu_info.h b/daemon/agents/perf/cpu_info.h new file mode 100644 index 00000000..21884df7 --- /dev/null +++ b/daemon/agents/perf/cpu_info.h @@ -0,0 +1,40 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "CpuUtils.h" +#include "ICpuInfo.h" +#include "agents/perf/capture_configuration.h" + +namespace agents::perf { + /** Implements the ICpuInfo interface, providing a thin wrapper around the data received in the configuration message and allowing simple rescan of properties */ + class cpu_info_t : public ICpuInfo { + public: + explicit cpu_info_t(std::shared_ptr configuration) + : configuration(std::move(configuration)) + { + } + + [[nodiscard]] lib::Span getCpuIds() const override { return configuration->per_core_cpuids; } + + [[nodiscard]] lib::Span getClusters() const override { return configuration->clusters; } + + [[nodiscard]] lib::Span getClusterIds() const override + { + return configuration->per_core_cluster_index; + } + + [[nodiscard]] const char * getModelName() const override { return ""; } + + void updateIds(bool /*ignoreOffline*/) override + { + cpu_utils::readCpuInfo(true, false, configuration->per_core_cpuids); + ICpuInfo::updateClusterIds(configuration->per_core_cpuids, + configuration->clusters, + configuration->per_core_cluster_index); + } + + private: + std::shared_ptr configuration; + }; +} diff --git a/daemon/agents/perf/cpufreq_counter.cpp b/daemon/agents/perf/cpufreq_counter.cpp index 69b78c2a..2914fe12 100644 --- a/daemon/agents/perf/cpufreq_counter.cpp +++ b/daemon/agents/perf/cpufreq_counter.cpp @@ -11,7 +11,7 @@ namespace agents::perf { std::optional read_cpu_frequency( int cpu_no, ICpuInfo const & cpu_info, - lib::Span cluster_keys_for_cpu_frequency_counter) + lib::Span cluster_keys_for_cpu_frequency_counter) { static constexpr std::size_t buffer_size = 128; static constexpr std::int64_t freq_multiplier = 1000; diff --git a/daemon/agents/perf/cpufreq_counter.h b/daemon/agents/perf/cpufreq_counter.h index 3834a140..4b93090a 100644 --- a/daemon/agents/perf/cpufreq_counter.h +++ b/daemon/agents/perf/cpufreq_counter.h @@ -19,5 +19,5 @@ namespace agents::perf { std::optional read_cpu_frequency( int cpu_no, ICpuInfo const & cpu_info, - lib::Span cluster_keys_for_cpu_frequency_counter); + lib::Span cluster_keys_for_cpu_frequency_counter); } diff --git a/daemon/agents/perf/detail/perf_buffer_consumer_detail.cpp b/daemon/agents/perf/detail/perf_buffer_consumer_detail.cpp deleted file mode 100644 index c466f076..00000000 --- a/daemon/agents/perf/detail/perf_buffer_consumer_detail.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ - -#include "agents/perf/detail/perf_buffer_consumer_detail.h" - -#include "lib/Utils.h" -#include "linux/perf/PerfUtils.h" - -#include - -namespace agents::perf::detail { - void * try_mmap_with_logging(int cpu, const buffer_config_t & config, std::size_t length, off_t offset, int fd) - { - auto * buf = lib::mmap(nullptr, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); - - // NOLINTNEXTLINE(performance-no-int-to-ptr) - if (buf == MAP_FAILED) { - std::array strbuf {0}; - strerror_r(errno, strbuf.data(), strbuf.size()); - LOG_DEBUG("mmap failed for fd %i (errno=%d, %s, mmapLength=%zu, offset=%zu)", - fd, - errno, - strbuf.data(), - length, - static_cast(offset)); - if ((errno == ENOMEM) || ((errno == EPERM) && (getuid() != 0))) { - LOG_ERROR("Could not mmap perf buffer on cpu %d, '%s' (errno: %d) returned.\n" - "This may be caused by a limit in /proc/sys/kernel/perf_event_mlock_kb.\n" - "Try again with a smaller value of --mmap-pages.\n" - "Usually, a value of ((perf_event_mlock_kb * 1024 / page_size) - 1) or lower will work.\n" - "The current effective value for --mmap-pages is %zu", - cpu, - strbuf.data(), - errno, - config.data_buffer_size / config.page_size); - snprintf(strbuf.data(), strbuf.size(), "/sys/devices/system/cpu/cpu%u/online", cpu); - std::int64_t online_status = 0; - lib::readInt64FromFile(strbuf.data(), online_status); - LOG_DEBUG("Online status for cpu%d is %" PRId64, cpu, online_status); - - std::optional file_value = perf_utils::readPerfEventMlockKb(); - if (file_value.has_value()) { - LOG_DEBUG(" Perf MlockKb Value is %" PRId64, file_value.value()); - } - else { - LOG_DEBUG("reading Perf MlockKb returned null"); - } - } - else { - LOG_DEBUG("mmap failed for a different reason"); - } - } - else { - LOG_DEBUG("mmap passed for fd %i (mmapLength=%zu, offset=%zu)", - fd, - length, - static_cast(offset)); - } - return buf; - } -} diff --git a/daemon/agents/perf/detail/perf_buffer_consumer_detail.h b/daemon/agents/perf/detail/perf_buffer_consumer_detail.h deleted file mode 100644 index 617286e0..00000000 --- a/daemon/agents/perf/detail/perf_buffer_consumer_detail.h +++ /dev/null @@ -1,610 +0,0 @@ -/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ - -#pragma once - -#include "Logging.h" -#include "agents/perf/record_types.h" -#include "async/async_buffer.hpp" -#include "async/continuations/async_initiate.h" -#include "async/continuations/continuation.h" -#include "async/continuations/operations.h" -#include "async/continuations/use_continuation.h" -#include "k/perf_event.h" -#include "lib/Span.h" -#include "lib/Syscall.h" - -#include -#include - -#include -#include -#include -#include - -#include - -namespace agents::perf::detail { - - constexpr auto error_buf_sz = 256; - - struct buffer_region_t { - std::uint64_t head {0}; - std::uint64_t tail {0}; - }; - - /** - * Holds the captured state of the data & aux buffer pointers so that the kernel can continue - * writing into the buffer while we're waiting for asynchronous processing to complete. - */ - struct buffer_snapshot_t { - perf_event_mmap_page * header_page; - buffer_region_t data; - buffer_region_t aux; - }; - - /** - * An async operation that will encode & write a sequence of data_record_chunk_tuple_t into - * the intermediate buffer. - */ - template - class data_encode_op_t { - public: - data_encode_op_t(std::shared_ptr encoder, - std::shared_ptr async_buffer) - : encoder {std::move(encoder)}, async_buffer(std::move(async_buffer)) - { - } - - template - auto async_exec(int cpu, lib::Span chunks, CompletionToken && token) - { - using namespace async::continuations; - return async_initiate>( - [cpu, chunks, e = encoder, b = async_buffer]() mutable { - return start_with() // - | then([cpu, chunks, e, b = std::move(b)]() mutable { - return e->async_encode(std::move(b), cpu, 0, chunks, use_continuation); - }) // - | then([](auto ec, auto /*n*/) { return start_with(ec); }); - }, - token); - } - - private: - std::shared_ptr encoder; - std::shared_ptr async_buffer; - }; - - /** - * An async operation that will encode & write a sequence of aux_record_chunk_t into the - * intermediate buffer. - */ - template - class aux_encode_op_t { - public: - aux_encode_op_t(std::shared_ptr encoder, std::shared_ptr async_buffer) - : encoder {std::move(encoder)}, async_buffer {std::move(async_buffer)} - { - } - - template - auto async_exec(int cpu, std::uint64_t tail, lib::Span chunks, CompletionToken && token) - { - using namespace async::continuations; - return async_initiate>( - [cpu, tail, chunks, e = encoder, b = async_buffer]() mutable { - return start_with() // - | then([cpu, tail, chunks, e, b = std::move(b)]() mutable { - return e->async_encode(std::move(b), cpu, tail, chunks, use_continuation); - }) // - | then([](auto ec, auto /*n*/) { return start_with(ec); }); - }, - token); - } - - private: - std::shared_ptr encoder; - std::shared_ptr async_buffer; - }; - - /** - * Encapsulates the logic to parse the perf aux buffer into an array of aux_record_chunk_t - * and pass that span to an async consumer. Once that consumer completes this operation's - * completion handler will update the ring buffer tail pointer to give the space back to - * the kernel. - * - * @tparam SnapshotFn Callable that returns a buffer snapshot. - * @tparam ComposedOp A callable that holds the async operation that will process - * the parsed records. - */ - template - class aux_consume_op_t : public std::enable_shared_from_this> { - public: - explicit aux_consume_op_t(int cpu, - perf_buffer_t * perf_buffer, - std::size_t aux_buffer_length, - SnapshotFn && snap, - ComposedOp && op) - : cpu(cpu), - perf_buffer(perf_buffer), - aux_buffer_length(aux_buffer_length), - snapper(std::forward(snap)), - op(std::forward(op)) - { - } - - template - auto async_exec(CompletionToken && token) - { - using namespace async::continuations; - - return async_initiate>( - [self = this->shared_from_this()]() mutable { - return start_with() // - | then([self]() -> polymorphic_continuation_t { - const char * buffer = static_cast(self->perf_buffer->aux_buffer); - - const auto snapshot = self->snapper(); - - const auto header_head = snapshot.aux.head; - const auto header_tail = snapshot.aux.tail; - - if (header_head <= header_tail) { - return start_with(boost::system::error_code {}, snapshot); - } - - const auto length = self->aux_buffer_length; - - const std::size_t buffer_mask = length - 1; - - // will be 'length' at most otherwise somehow wrapped many times - const std::size_t total_data_size = - std::min(header_head - header_tail, length); - const std::uint64_t head = header_head; - // will either be the same as 'tail' or will be > if somehow wrapped multiple times - const std::uint64_t tail = (header_head - total_data_size); - - const std::size_t tail_masked = (tail & buffer_mask); - const std::size_t head_masked = (head & buffer_mask); - - const bool have_wrapped = head_masked < tail_masked; - - const std::size_t first_size = (have_wrapped ? (length - tail_masked) : total_data_size); - const std::size_t second_size = (have_wrapped ? head_masked : 0); - - if (first_size <= 0) { - self->update_buffer_position(snapshot); - return start_with(boost::system::error_code {}, snapshot); - } - - self->chunks = {aux_record_chunk_t {buffer + tail_masked, first_size}, - aux_record_chunk_t {buffer, second_size}}; - - return self->op.async_exec(self->cpu, - tail, - self->chunks, - use_continuation) // - | then([self, snapshot](auto ec) { - self->update_buffer_position(snapshot); - return start_with(ec, snapshot); - }); - }); - }, - token); - } - - private: - int cpu; - perf_buffer_t * perf_buffer; - const std::size_t aux_buffer_length; - SnapshotFn snapper; - ComposedOp op; - std::array chunks; - - void update_buffer_position(buffer_snapshot_t snapshot) - { - // only update if we're actually consuming from the aux buffer - if (snapshot.aux.head != snapshot.aux.tail) { - // Update tail with the aux read and synchronize with the buffer writer - __atomic_store_n(&snapshot.header_page->aux_tail, snapshot.aux.head, __ATOMIC_RELEASE); - } - } - }; - - /** - * An async operation that parses arrays of data_record_chunk_tuple_t from the perf event - * ring buffer, and passes those arrays to an async consumer for further processing. - * Parsing is done in fixed sized blocks of chunks (CHUNK_BUFFER_SIZE) and will loop until - * the snapshotted ring buffer region has been consumed. Once that has happend the tail pointer - * is updated, to pass the buffer space back to the kernel, and the completion hander is called. - * - * @tparam Executor The Asio exectuor to use when dispatching intermediate operations. - * @tparam ComposedOp The async consumer that will processed the parsed data_record_chunk_tuple_t - * arrays. - */ - template - class data_consume_op_t : public std::enable_shared_from_this> { - - private: - // arbitrary, roughly 4k size stack allocation on 64-bit - static constexpr std::size_t CHUNK_BUFFER_SIZE = 256; - static constexpr std::size_t CHUNK_WORD_SIZE = sizeof(data_word_t); - - std::reference_wrapper executor; - int cpu; - char * ring_buffer; - const std::size_t buffer_length; - const std::size_t buffer_mask; - buffer_snapshot_t snap; - ComposedOp op; - - std::array chunk_buffer; - std::size_t head; - std::size_t tail; - - template - const T * ring_buffer_ptr(const char * base, std::size_t position_masked) - { - return reinterpret_cast(base + position_masked); - } - - template - const T * ring_buffer_ptr(const char * base, std::size_t position, std::size_t size_mask) - { - return ring_buffer_ptr(base, position & size_mask); - } - - std::size_t calculate_next_chunk_count() - { - // start by clearning any old junk out of the buffer - std::size_t num_chunks_in_buffer = 0; - - // is there any more work left to do? - while ((head > tail) && (num_chunks_in_buffer != CHUNK_BUFFER_SIZE)) { - const auto * record_header = ring_buffer_ptr(ring_buffer, tail, buffer_mask); - const auto record_size = (record_header->size + CHUNK_WORD_SIZE - 1) & ~(CHUNK_WORD_SIZE - 1); - const auto record_end = tail + record_size; - const std::size_t base_masked = (tail & buffer_mask); - const std::size_t end_masked = (record_end & buffer_mask); - - const bool have_wrapped = end_masked < base_masked; - - const std::size_t first_size = (have_wrapped ? (buffer_length - base_masked) : record_size); - const std::size_t second_size = (have_wrapped ? end_masked : 0); - - // set chunk - chunk_buffer[num_chunks_in_buffer].first_chunk.chunk_pointer = - ring_buffer_ptr(ring_buffer, base_masked); - chunk_buffer[num_chunks_in_buffer].first_chunk.word_count = first_size / CHUNK_WORD_SIZE; - chunk_buffer[num_chunks_in_buffer].optional_second_chunk.chunk_pointer = - ring_buffer_ptr(ring_buffer, 0); - chunk_buffer[num_chunks_in_buffer].optional_second_chunk.word_count = second_size / CHUNK_WORD_SIZE; - - num_chunks_in_buffer += 1; - tail = record_end; - } - - return num_chunks_in_buffer; - } - - async::continuations::polymorphic_continuation_t co_initiate() - { - using namespace async::continuations; - return start_on(executor.get()) // - | then([self = this->shared_from_this()]() mutable - -> polymorphic_continuation_t { - auto num_chunks_in_buffer = self->calculate_next_chunk_count(); - // if there's nothing left in the buffer then our work is done - call the completion handler - if (num_chunks_in_buffer == 0) { - return start_with(boost::system::error_code {}); - } - - return self->op.async_exec(self->cpu, - lib::Span {self->chunk_buffer.data(), - num_chunks_in_buffer}, - use_continuation) // - | then([self](auto ec) -> polymorphic_continuation_t { - // something went wrong so just drop this part of the - // buffer and let the handler know - if (ec) { - return start_with(ec); - } - // otherwise, loop back round and continue reading from the ring buffer - return self->co_initiate(); - }); - }); - } - - void update_buffer_position() - { - __atomic_store_n(&snap.header_page->data_tail, snap.data.head, __ATOMIC_RELEASE); - } - - public: - /** - * Constructs the reader "closure" over the specified state data. - */ - data_consume_op_t(Executor & executor, - int cpu, - char * ring_buffer, - std::size_t buffer_length, - const buffer_snapshot_t & snap, - ComposedOp && op) - : executor(executor), - cpu(cpu), - ring_buffer(ring_buffer), - buffer_length(buffer_length), - buffer_mask(buffer_length - 1), - snap(snap), - op(std::move(op)), - chunk_buffer {}, - head(snap.data.head), - tail(snap.data.tail) - { - } - - template - auto async_exec(CompletionToken && token) - { - using namespace async::continuations; - return async_initiate>( - [self = this->shared_from_this()]() mutable { - return self->co_initiate() // - | then([self](auto ec) { - self->update_buffer_position(); - return ec; - }); - }, - token); - } - }; - - /** - * Calculate the mmap region from @a config. - * - * @param config Buffer config. - * @return Size in bytes. - */ - [[nodiscard]] inline std::size_t get_data_mmap_length(const buffer_config_t & config) - { - return config.page_size + config.data_buffer_size; - } - - /** - * Attempts to create the ringbuffer mmap and provides detailed logging - * upon error. - * - * @param cpu CPU ID. - * @param config Buffer config. - * @param length mmap size in bytes. - * @param offset Offset into @fd to load, bytes in multiples of pages. - * @param fd File descriptor to load. - * @return MMapped region start pointer, or MAP_FAILED on error. - */ - [[nodiscard]] void * try_mmap_with_logging(int cpu, - const buffer_config_t & config, - std::size_t length, - off_t offset, - int fd); - - /** - * An encapsulation of the logic to asynchronously process the data + aux buffers for a single CPU. - */ - template - class perf_consume_op_t : public std::enable_shared_from_this> { - public: - /** - * Construct the async op for the specified CPU ring buffer. - * - * @param executor The Asio executor to use when dispatching async operations. - * @param cpu The cpu number that this buffer is attached to. - * @param config The config object that holds buffer length & page size information. - * @param pb An object that holds pointers to the data & aux buffers. - */ - perf_consume_op_t(Executor & executor, int cpu, const buffer_config_t & config, perf_buffer_t pb) - : executor(executor), - cpu(cpu), - config(config), - header_page(reinterpret_cast(static_cast(pb.data_buffer))), - perf_buffer(pb) - { - } - - /** Unmaps the mmaped regions. */ - ~perf_consume_op_t() - { - lib::munmap(perf_buffer.data_buffer, get_data_mmap_length(config)); - if (perf_buffer.aux_buffer != nullptr) { - lib::munmap(perf_buffer.aux_buffer, config.aux_buffer_size); - } - } - - /** - * Asynchronously calls the aux and then data consumers. - * - * @tparam DataConsumeOp Data consumer type - * @tparam AuxConsumeOp Aux consumer type - * @tparam CompletionToken Token type, expects an error_code. - * @param data_op Data consumer - * @param aux_op Aux consumer - * @param token Called once the operations have completed - */ - template - auto async_send(DataConsumeOp && data_op, AuxConsumeOp && aux_op, CompletionToken && token) - { - using namespace async::continuations; - - return async_initiate>( - [self = this->shared_from_this(), - data_op = std::forward(data_op), - aux_op = std::forward(aux_op)]() mutable { - return start_on(self->executor) // - | then([self, aux_op = std::move(aux_op)]() mutable { - auto snapshotter = [self]() { return self->snapshot(); }; - - auto consumer = std::make_shared>( - self->cpu, - &(self->perf_buffer), - self->config.aux_buffer_size, - std::move(snapshotter), - std::move(aux_op)); - - return consumer->async_exec(use_continuation); - }) - | then([self, data_op = std::move(data_op)](auto ec, auto snap) mutable - -> polymorphic_continuation_t { - if (ec) { - return start_with(ec); - } - - auto consumer = std::make_shared>( - self->executor, - self->cpu, - static_cast(self->perf_buffer.data_buffer) + self->config.page_size, - self->config.data_buffer_size, - snap, - std::move(data_op)); - - return consumer->async_exec(use_continuation); - }); - }, - token); - } - - /** - * Calls ioctl with PERF_EVENT_IOC_SET_OUTPUT on @fd using the buffer's - * FD. - * - * @param fd FD of event to assign the output of. - * @return True on success. - */ - boost::system::error_code set_output(int fd) - { - // NOLINTNEXTLINE(hicpp-signed-bitwise) - if (lib::ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, perf_buffer.fd) < 0) { - std::array strbuf {0}; - strerror_r(errno, strbuf.data(), strbuf.size()); - LOG_DEBUG("ioctl failed for fd %i (errno=%d, %s)", fd, errno, strbuf.data()); - return boost::system::errc::make_error_code(static_cast(errno)); - } - - return {}; - } - - /** - * Create an aux buffer mmap and associate it with this instance. - * - * No-op if an aux buffer is alerady attached. - * @param fd File descriptor to mmap. - * @return True on success. - */ - boost::system::error_code attach_aux_buffer(int fd) - { - if (perf_buffer.aux_buffer == nullptr) { - const auto offset = get_data_mmap_length(config); - const auto length = config.aux_buffer_size; - - perf_event_mmap_page & pemp = *static_cast(perf_buffer.data_buffer); - pemp.aux_offset = offset; - pemp.aux_size = length; - - if (offset > std::numeric_limits::max()) { - LOG_DEBUG("Offset for perf aux buffer is out of range: %zu", offset); - return boost::system::errc::make_error_code(boost::system::errc::result_out_of_range); - } - - auto * buf = try_mmap_with_logging(cpu, config, length, static_cast(offset), fd); - // NOLINTNEXTLINE(performance-no-int-to-ptr) - if (buf == MAP_FAILED) { - // Can't use errno here as other ops in try_mmap_with_logging overrride it - return boost::system::errc::make_error_code(boost::system::errc::invalid_argument); - } - - perf_buffer.aux_buffer = buf; - if (perf_buffer.aux_fd > 0) { - LOG_DEBUG("Multiple aux fds"); - return boost::system::errc::make_error_code(boost::system::errc::invalid_argument); - } - perf_buffer.aux_fd = fd; - } - - return {}; - } - - private: - Executor & executor; - int cpu; - const buffer_config_t & config; - perf_event_mmap_page * header_page; - perf_buffer_t perf_buffer; - - /** - * Creates a point-in-time snapshot of the state of the ring buffer head/tail pointers. - * This allows us to process the buffer asynchronously whilst the kernel continues write - * into it. We need to ensure that we don't publish aux buffer entries before the data - * records. - */ - [[nodiscard]] buffer_snapshot_t snapshot() const - { - buffer_snapshot_t snap; - - // We read the data buffer positions before we read the aux buffer positions - // so that we never send records more recent than the aux - snap.header_page = header_page; - snap.data.head = __atomic_load_n(&snap.header_page->data_head, __ATOMIC_ACQUIRE); - // Only we write this so no atomic load needed - snap.data.tail = snap.header_page->data_tail; - - // Now send the aux data before the records to ensure the consumer never receives - // a PERF_RECORD_AUX without already having received the aux data - void * const aux_buf = perf_buffer.aux_buffer; - if (aux_buf != nullptr) { - snap.aux.head = __atomic_load_n(&snap.header_page->aux_head, __ATOMIC_ACQUIRE); - // Only we write this so no atomic load needed - snap.aux.tail = snap.header_page->aux_tail; - } - - return snap; - } - }; - - /** - * Creates a perf_consume_op_t once it's primary ringbuffer has been - * successfully initialised. - * - * @tparam Executor Executor type. - * @param executor Excecutor instance. - * @param fd MMap FD. - * @param cpu CPU index. - * @param config Buffer configuration. - * @return perf_consume_op_t shared pointer, or nullptr if mmap-ing was - * unsuccessful. - */ - template - [[nodiscard]] std::shared_ptr> perf_consume_op_factory_t(Executor & executor, - int fd, - int cpu, - const buffer_config_t & config) - { - // Create the data buffer instance - auto * buf = detail::try_mmap_with_logging(cpu, config, get_data_mmap_length(config), 0, fd); - // NOLINTNEXTLINE(performance-no-int-to-ptr) - if (buf == MAP_FAILED) { - return {}; - } - - // Check the version - perf_event_mmap_page & pemp = *static_cast(buf); - const auto compat_version = pemp.compat_version; - if (compat_version != 0) { - LOG_DEBUG("Incompatible perf_event_mmap_page compat_version (%i) for fd %i", compat_version, fd); - lib::munmap(buf, get_data_mmap_length(config)); - return {}; - } - - return std::make_shared>(executor, - cpu, - config, - perf_buffer_t {buf, nullptr, fd, -1}); - } -} diff --git a/daemon/agents/perf/events/event_binding_manager.hpp b/daemon/agents/perf/events/event_binding_manager.hpp new file mode 100644 index 00000000..ac89a74c --- /dev/null +++ b/daemon/agents/perf/events/event_binding_manager.hpp @@ -0,0 +1,1083 @@ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ +#pragma once + +#include "Logging.h" +#include "agents/perf/capture_configuration.h" +#include "agents/perf/events/event_bindings.hpp" +#include "agents/perf/events/event_configuration.hpp" +#include "agents/perf/events/perf_activator.hpp" +#include "agents/perf/events/perf_ringbuffer_mmap.hpp" +#include "agents/perf/events/types.hpp" +#include "lib/Assert.h" +#include "lib/EnumUtils.h" +#include "linux/perf/PerfUtils.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace agents::perf { + + /** + * This class provides the means to manage per-core / per-thread counter groups for CPU (i.e PMU/software/tracepoint, not uncore) related events. + * + * The manager will respond to core online/offline events, along with pid track/untrack events and activate groups of events on a per (core+thread) + * basis (as appropriate for app vs system-wide mode). It will handle cases where the core is reported as offline during activation, or likewise + * where the thread terminates. + * + * online and track events are split into two calls; a 'xxx_prepare' method which prepares the events with appropriate calls to perf_event_open. + * The set of opened items is returned as id->key mappings, allowing the caller to serialize them into the APC capture. This may then + * be followed by a call to 'xxx_start' method which will activate the perf event group. + */ + template + class event_binding_manager_t { + public: + using perf_activator_t = PerfActivator; + using stream_descriptor_t = typename perf_activator_t::stream_descriptor_t; + using event_binding_set_type = event_binding_set_t; + using id_to_key_mappings_t = std::vector>; + + /* The tuple of fd and is-aux flag */ + using fd_aux_flag_pair_t = std::pair, bool>; + /** The tuple of pid, fd and is_aux flag */ + using pid_fd_pair_t = std::pair; + /** The tuple of core no, fd and is_aux flag */ + using core_no_fd_pair_t = std::pair; + + /** Returned by core_online_prepare */ + struct core_online_prepare_result_t { + /** Indicates the state of the core, where: + * - aggregate_state_t::usable means the core was online and had events attached to it. + * - aggregate_state_t::failed means some unexpected fatal error occured. The core will be reverted to an offline state. + * - aggregate_state_t::terminated means there are no threads currently tracked. The core will be usable once a thread is tracked. + * - aggregate_state_t::offline means the core went offline again and will be left in that state with no events attached to it + */ + aggregate_state_t state; + /** The mapping from event id to key */ + id_to_key_mappings_t mappings {}; + /** The set of pids that were previously tracked, but were removed as were detected as terminated during the prepare call */ + std::set terminated_pids {}; + /** The stream descriptors to monitor */ + std::vector event_fds_by_pid {}; + /** The mmap */ + std::shared_ptr mmap_ptr {}; + }; + + /** Returned by core_online_start */ + struct core_online_start_result_t { + /** Indicates the state of the core, where: + * - aggregate_state_t::usable means the core was started correctly. + * - aggregate_state_t::failed means some unexpected fatal error occured. The core will be reverted to an offline state. + * - aggregate_state_t::terminated means there are no threads currently tracked. The core will be usable once a thread is tracked. + * - aggregate_state_t::offline means the core went offline again before the call, or no prior call to core_online_prepare was made. + */ + aggregate_state_t state; + /** The set of pids that were previously tracked, but were removed as were detected as terminated during the start call */ + std::set terminated_pids {}; + }; + + /** Returned by pid_track_prepare */ + struct pid_track_prepare_result_t { + /** Indicates the state of the pid, where: + * - aggregate_state_t::usable means the pid was tracked and attached to at least one online core. + * - aggregate_state_t::failed means some unexpected fatal error occured. The pid will be reverted to an untracked state. + * - aggregate_state_t::terminated means that the pid terminated during the prepare call and is reverted to an untracked state. + * - aggregate_state_t::offline means no cores are currently online, or all that previously were online went offline during the prepare call. The pid will be usable the next time a core comes online. + */ + aggregate_state_t state; + /** The mapping from event id to key */ + id_to_key_mappings_t mappings {}; + /** The set of cores that were previously online, but were removed as were detected as offline during the prepare call */ + std::set offlined_cores {}; + /** The stream descriptors to monitor */ + std::vector event_fds_by_core_no {}; + }; + + /** Returned by pid_track_start */ + struct pid_track_start_result_t { + /** Indicates the state of the pid, where: + * - aggregate_state_t::usable means the pid was started correctly on at least one core. + * - aggregate_state_t::failed means some unexpected fatal error occured. The pid will be reverted to an untracked state. + * - aggregate_state_t::terminated means that the pid terminated during the prepare call and is reverted to an untracked state. + * - aggregate_state_t::offline means no cores are currently online, or all that previously were online went offline during the prepare call. The pid will be usable the next time a core comes online. + */ + aggregate_state_t state; + /** The set of cores that were previously online, but were removed as were detected as offline during the start call */ + std::set offlined_cores {}; + }; + + static constexpr pid_t self_pid {0}; + static constexpr pid_t system_wide_pid {-1}; + static constexpr pid_t header_pid {0}; // not the same as system-wide, and not a valid pid + + /** + * Construct a new active capture binding manager + * + * @param perf_activator The event *perf_activator object + * @param configuration The counter configuration + * @param uncore_pmus The uncore pmu list + * @param core_no_to_spe_type The mapping from core no to SPE pmu type value + * @param is_system_wide True for system-wide captures, false for app captures + * @param enable_on_exec True for enable-on-exec with app captures + */ + event_binding_manager_t(std::shared_ptr perf_activator, + event_configuration_t const & configuration, + std::vector const & uncore_pmus, + std::map const & core_no_to_spe_type, + bool is_system_wide, + bool enable_on_exec) + : perf_activator(std::move(perf_activator)), + configuration(configuration), + uncore_pmus(uncore_pmus), + core_no_to_spe_type(core_no_to_spe_type), + is_system_wide(is_system_wide), + enable_on_exec(enable_on_exec) + { + } + + /** @return True if capture started, false otherwise */ + [[nodiscard]] bool is_capture_started() const { return capture_started; } + + /** Mark the capture as having started */ + void set_capture_started() { capture_started = true; } + + /** @return true if there are any SPE counters active on any core */ + [[nodiscard]] bool has_spe() const + { + return !(core_no_to_spe_type.empty() || configuration.spe_events.empty()); + } + + /** @return true if the cpu requires an aux buffer */ + [[nodiscard]] bool requires_aux(core_no_t no) const + { + // does it have any spe events? + auto spe_it = core_no_to_spe_type.find(no); + auto spe_type = (spe_it != core_no_to_spe_type.end() ? spe_it->second : 0); + + return spe_type != 0; + } + + /** + * Called to notify that a cpu core was onlined + * + * @param no The identifier for the core that changed state + * @param cluster_id The cluster id for the core + * @param additional_tids The set of new additional pids to add to the known set and activate on this core (only) + */ + [[nodiscard]] core_online_prepare_result_t core_online_prepare(core_no_t no, + cpu_cluster_id_t cluster_id, + std::set const & additional_tids) + { + runtime_assert(additional_tids.empty() || !is_system_wide, + "additional_tids provided but system-wide capture"); + + LOG_DEBUG("Core online prepare %d 0x%x", lib::toEnumValue(no), lib::toEnumValue(cluster_id)); + + // update the set of tracked pids + tracked_pids.insert(additional_tids.begin(), additional_tids.end()); + + // update the core type map + auto [it, inserted] = core_properties.try_emplace(no, core_properties_t {no, cluster_id}); + + // if the core was already online, then fail + if (!inserted) { + LOG_DEBUG("Core already online"); + return core_online_prepare_result_t {aggregate_state_t::failed}; + } + + id_to_key_mappings_t id_to_key_mappings {}; + + // tracking fds for polling / mmap + std::shared_ptr mmap_ptr {}; + std::vector event_fds_by_pid {}; + + // create the per-mmap header event + auto header_result = core_online_prepare_header(no, cluster_id, it); + if (header_result.state != aggregate_state_t::usable) { + return core_online_prepare_result_t {header_result.state}; + } + + // save the header id tracking + id_to_key_mappings.emplace_back(header_result.id, configuration.header_event.key); + // store the fd + it->second.header_event_fd = header_result.fd; + // mmap the header event + mmap_ptr = std::make_shared( + perf_activator->mmap_data(no, header_result.fd->native_handle())); + if (!mmap_ptr->has_data()) { + LOG_DEBUG("Core online prepare %d 0x%x failed due to data mmap error", + lib::toEnumValue(no), + lib::toEnumValue(cluster_id)); + core_offline_it(it); + return core_online_prepare_result_t {aggregate_state_t::failed}; + } + // store the mmap + it->second.mmap = mmap_ptr; + // header_fd should be in event_fds + event_fds_by_pid.emplace_back(pid_fd_pair_t {header_pid, {header_result.fd, false}}); + + // create the real events + LOG_DEBUG("Creating core set %d 0x%x", lib::toEnumValue(no), lib::toEnumValue(cluster_id)); + auto [result, removed_pids] = create_binding_sets_for_core( + [&id_to_key_mappings](gator_key_t key, perf_event_id_t id) { + id_to_key_mappings.emplace_back(id, key); + }, + make_mmap_tracker( + perf_activator, + mmap_ptr, + header_result.fd, + no, + cluster_id, + [&event_fds_by_pid](pid_t pid, std::shared_ptr fd, bool requires_aux) { + event_fds_by_pid.emplace_back(pid_fd_pair_t {pid, {std::move(fd), requires_aux}}); + }), + it->second); + + switch (result) { + case aggregate_state_t::usable: { + LOG_DEBUG("Core online prepare %d 0x%x succeeded", + lib::toEnumValue(no), + lib::toEnumValue(cluster_id)); + + return core_online_prepare_result_t {result, + std::move(id_to_key_mappings), + std::move(removed_pids), + std::move(event_fds_by_pid), + mmap_ptr}; + } + case aggregate_state_t::terminated: { + LOG_DEBUG("Core online prepare %d 0x%x failed as all threads terminated / none tracked", + lib::toEnumValue(no), + lib::toEnumValue(cluster_id)); + // return usable, but only have the header id mapping + return core_online_prepare_result_t {aggregate_state_t::usable, + { + {header_result.id, configuration.header_event.key}, + }, + std::move(removed_pids), + {pid_fd_pair_t {header_pid, {header_result.fd, false}}}, + mmap_ptr}; + } + case aggregate_state_t::offline: { + LOG_DEBUG("Core online prepare %d 0x%x failed as core went offline", + lib::toEnumValue(no), + lib::toEnumValue(cluster_id)); + break; + } + case aggregate_state_t::failed: { + LOG_DEBUG("Core online prepare %d 0x%x failed due to error", + lib::toEnumValue(no), + lib::toEnumValue(cluster_id)); + break; + } + default: { + throw std::runtime_error("unexpected aggregate_state_t"); + } + } + + // for offline / failed - transition all the event sets into offline state + core_offline_it(it); + + return core_online_prepare_result_t {result, {}, std::move(removed_pids)}; + } + + /** + * Called to notify that a cpu core was onlined + * + * @param no The identifier for the core that changed state + */ + [[nodiscard]] core_online_start_result_t core_online_start(core_no_t no) + { + runtime_assert(capture_started, "core_online_start called before capture started"); + + // no operation required if the core is already offline + auto it = core_properties.find(no); + if (it == core_properties.end()) { + LOG_DEBUG("Core online start %d called, but core offline", lib::toEnumValue(no)); + return {aggregate_state_t::offline, {}}; + } + + // if the core is online but there are no pids yet + if (it->second.binding_sets.empty()) { + LOG_DEBUG("Core online start %d called, but no pids are tracked", lib::toEnumValue(no)); + return {aggregate_state_t::terminated, {}}; + } + + // now transition all the event sets into online state + bool all_terminated = true; + + auto & binding_sets = it->second.binding_sets; + std::set terminated_pids {}; + + for (auto & entry : binding_sets) { + LOG_DEBUG("Core online start %d called, starting pid %d", lib::toEnumValue(no), entry.first); + + auto result = entry.second.start(*perf_activator); + + switch (result) { + case aggregate_state_t::usable: { + all_terminated = false; + break; + } + + case aggregate_state_t::terminated: { + // the process is terminated, remove it (later) + LOG_DEBUG("Core online start %d called, pid %d was terminated", + lib::toEnumValue(no), + entry.first); + terminated_pids.insert(entry.first); + break; + } + + case aggregate_state_t::offline: + case aggregate_state_t::failed: { + LOG_DEBUG("Core online start %d called, pid %d %s, removing core", + lib::toEnumValue(no), + entry.first, + (result == aggregate_state_t::offline ? "was offline" : "failed with error")); + + core_offline_it(it); + return {result, {}}; + } + + default: { + throw std::runtime_error("unexpected aggregate_state_t"); + } + } + } + + // remove any terminated pids + for (auto pid : terminated_pids) { + pid_untrack(pid); + } + + return {(all_terminated ? aggregate_state_t::terminated // + : aggregate_state_t::usable), + std::move(terminated_pids)}; + } + + /** + * Called to notify that a cpu core was offlined + * + * @param no The identifier for the core that changed state + */ + void core_offline(core_no_t no) + { + LOG_DEBUG("Core offline %d", lib::toEnumValue(no)); + + // no opperation required if the core is already offline + auto it = core_properties.find(no); + if (it == core_properties.end()) { + return; + } + + // offline and erase it + core_offline_it(it); + } + + /** + * Add a new PID (a thread) to the set of threads that are currently being captured. + * + * If the capture is currently active, then they will be activated immediately, otherwise the PID is stored + * and activated when the capture is started. + */ + [[nodiscard]] pid_track_prepare_result_t pid_track_prepare(pid_t pid) + { + LOG_DEBUG("Track %d", pid); + + runtime_assert(!is_system_wide, "pid_track_prepare is only valid when !system-wide"); + + auto [it, inserted] = tracked_pids.insert(pid); + (void) it; // gcc7 :-( + + // if the pid is newly tracked then create bindings for all active cores + if (!inserted) { + LOG_DEBUG("Duplicate pid tracked"); + return {aggregate_state_t::usable, {}, {}}; + } + + id_to_key_mappings_t id_to_key_mappings {}; + std::vector event_fds_by_core_no {}; + + auto [result, offlined_cores] = create_binding_sets_for_pid( + [&id_to_key_mappings](gator_key_t key, perf_event_id_t id) { + id_to_key_mappings.emplace_back(id, key); + }, + event_fds_by_core_no, + pid); + + switch (result) { + case aggregate_state_t::usable: { + LOG_DEBUG("Track %d was succesfully prepared", pid); + + return {aggregate_state_t::usable, + std::move(id_to_key_mappings), + std::move(offlined_cores), + std::move(event_fds_by_core_no)}; + } + + case aggregate_state_t::offline: { + LOG_DEBUG("Track %d was succesful, but all cores offline", pid); + return {result, {}, std::move(offlined_cores), {}}; + } + + case aggregate_state_t::terminated: + case aggregate_state_t::failed: { + LOG_DEBUG("Track %d failed %s", + pid, + (result == aggregate_state_t::terminated ? "as process terminated" + : "due to unexpected error")); + + // remove the tracking and the bindings + tracked_pids.erase(pid); + remove_binding_sets_for_pid(pid); + return {result, {}, {}, {}}; + } + + default: { + throw std::runtime_error("unexpected aggregate_state_t"); + } + } + } + + /** Start binding sets on all known cores for the specified pid */ + [[nodiscard]] pid_track_start_result_t pid_track_start(pid_t pid) + { + runtime_assert(!is_system_wide, "pid_track_start is only valid when !system-wide"); + runtime_assert(capture_started, "pid_track_start called before capture started"); + + // check pid is tracked + if (tracked_pids.count(pid) == 0) { + LOG_DEBUG("Start pid %d failed as pid terminated / not tracked", pid); + return {aggregate_state_t::terminated, {}}; + } + + // now transition all the event sets into online state + bool all_offline = true; + std::set offlined_cores {}; + + for (auto & entry : core_properties) { + auto it = entry.second.binding_sets.find(pid); + if (it != entry.second.binding_sets.end()) { + LOG_DEBUG("Start pid %d on core %d", pid, lib::toEnumValue(entry.first)); + + auto result = it->second.start(*perf_activator); + switch (result) { + case aggregate_state_t::usable: { + all_offline = false; + break; + } + + case aggregate_state_t::offline: { + LOG_DEBUG("Start pid %d on core %d failed as core offline", + pid, + lib::toEnumValue(entry.first)); + offlined_cores.insert(entry.first); + break; + } + + case aggregate_state_t::terminated: + case aggregate_state_t::failed: { + LOG_DEBUG("Start pid %d on core %d failed %s", + pid, + lib::toEnumValue(entry.first), + (result == aggregate_state_t::terminated ? "as process terminated" + : "due to unexpected error")); + // transition all the event sets into offline state and remove them + remove_binding_sets_for_pid(pid); + return {result, {}}; + } + + default: { + throw std::runtime_error("unexpected aggregate_state_t"); + } + } + } + else { + LOG_DEBUG("Start pid %d on core %d failed as pid not found", pid, lib::toEnumValue(entry.first)); + } + } + + // remove all offline cores + for (auto no : offlined_cores) { + core_offline(no); + } + + return {(all_offline ? aggregate_state_t::offline // + : aggregate_state_t::usable), + std::move(offlined_cores)}; + } + + /** + * Remove a PID (if for example, the process exists) from the set of tracked pids. + */ + void pid_untrack(pid_t pid) + { + LOG_DEBUG("Untrack %d", pid); + + if (tracked_pids.erase(pid) > 0) { + remove_binding_sets_for_pid(pid); + } + } + + private: + /** Used by the event_bindings to track fds and create the mmap */ + template + struct mmap_tracker_t { + std::shared_ptr perf_activator; + std::shared_ptr mmap; + std::shared_ptr header_event_fd; + core_no_t no; + cpu_cluster_id_t cluster_id; + Consumer consumer; + + [[nodiscard]] bool operator()(pid_t pid, std::shared_ptr fd, bool requires_aux) + { + // save the fd to the list for monitoring + consumer(pid, fd, requires_aux); + + if (!mmap->has_data()) { + LOG_DEBUG("Core online prepare %d 0x%x failed due to data mmap error", + lib::toEnumValue(no), + lib::toEnumValue(cluster_id)); + return false; + } + + // redirect output + if (!perf_activator->set_output(fd->native_handle(), header_event_fd->native_handle())) { + LOG_DEBUG("Core online prepare %d 0x%x failed due to set_output error", + lib::toEnumValue(no), + lib::toEnumValue(cluster_id)); + return false; + } + + // mmap aux + if (requires_aux) { + perf_activator->mmap_aux(*mmap, no, fd->native_handle()); + if (!mmap->has_aux()) { + LOG_DEBUG("Core online prepare %d 0x%x failed due to mmap_aux failure", + lib::toEnumValue(no), + lib::toEnumValue(cluster_id)); + return false; + } + } + + return true; + } + }; + + template + static mmap_tracker_t make_mmap_tracker(std::shared_ptr perf_activator, + std::shared_ptr mmap, + std::shared_ptr header_event_fd, + core_no_t no, + cpu_cluster_id_t cluster_id, + Consumer && consumer) + { + return mmap_tracker_t {std::move(perf_activator), + std::move(mmap), + std::move(header_event_fd), + no, + cluster_id, + std::forward(consumer)}; + } + + /** + * The set of core-specifc properties, including the core-type, and the binding sets for that core + */ + struct core_properties_t { + /** Store all the binding sets, by pid */ + std::map binding_sets {}; + /** The set of uncore PMUs active on this CPU */ + std::set active_uncore_pmu_ids {}; + /** The core number */ + core_no_t no; + /** The core cluster id */ + cpu_cluster_id_t cluster_id; + /** The mmap */ + std::shared_ptr mmap {}; + /** The header event fd */ + std::shared_ptr header_event_fd {}; + + constexpr core_properties_t(core_no_t no, cpu_cluster_id_t cluster_id) : no(no), cluster_id(cluster_id) {} + }; + + std::shared_ptr perf_activator; + event_configuration_t const & configuration; + std::vector const & uncore_pmus; + std::map const & core_no_to_spe_type; + std::map core_properties {}; + std::map> spe_event_definitions_retyped {}; + bool is_system_wide; + bool enable_on_exec; + bool capture_started {false}; + std::set tracked_pids {}; + std::set all_active_uncore_pmu_ids {}; + + /** + * Create the binding sets for some core. + * + * The subclass will override this and invoke create_binding_set as appropriate for each process id that it cares about (which may just be -1 in the case of system-wide). + * + * @param properties The core properties object from the core_properties map + */ + template + [[nodiscard]] std::pair> create_binding_sets_for_core( + IdToKeyMappingTracker && id_to_key_mapping_tracker, + MmapTracker && mmap_tracker, + core_properties_t & properties) + { + LOG_DEBUG("Create for core %d", lib::toEnumValue(properties.no)); + + // just forward on, with pid == -1 for system wide + if (is_system_wide) { + return {create_binding_set(id_to_key_mapping_tracker, mmap_tracker, properties, system_wide_pid), {}}; + } + + // if there are no pids yet + if (tracked_pids.empty()) { + return {aggregate_state_t::terminated, {}}; + } + + bool all_terminated = true; + std::set terminated_pids {}; + + // create a binding set for each known pid + for (auto pid : tracked_pids) { + auto result = create_binding_set(id_to_key_mapping_tracker, mmap_tracker, properties, pid); + + switch (result) { + case aggregate_state_t::usable: { + all_terminated = false; + break; + } + + case aggregate_state_t::terminated: { + LOG_DEBUG("Core online prepare %d 0x%x detected a terminated process: %d", + lib::toEnumValue(properties.no), + lib::toEnumValue(properties.cluster_id), + pid); + + terminated_pids.insert(pid); + break; + } + + case aggregate_state_t::offline: + case aggregate_state_t::failed: { + return {result, {}}; + } + + default: { + throw std::runtime_error("unexpected aggregate_state_t"); + } + } + } + + // remove any terminated pids + for (auto pid : terminated_pids) { + pid_untrack(pid); + } + + return {(all_terminated ? aggregate_state_t::terminated // + : aggregate_state_t::usable), + std::move(terminated_pids)}; + } + + /** + * Create a binding set object associated with some core and process id + * + * @param id_to_key_mapping_tracker Receives the id->key mappings for all valid events + * @param mmap_tracker A callable of `bool(std::shared_ptr, bool)` that creates/appends fds to the mmap, for later polling + * @param properties The core properties object + * @param pid The process id + * @return the aggregate state of the attempt to create the events + */ + template + [[nodiscard]] aggregate_state_t create_binding_set(IdToKeyMappingTracker && id_to_key_mapping_tracker, + MmapTracker && mmap_tracker, + core_properties_t & properties, + pid_t pid) + { + LOG_DEBUG("Create binding set %d :: %d", lib::toEnumValue(properties.no), pid); + + // check the header fd and mmap + runtime_assert(properties.mmap != nullptr, "Invalid mmap value"); + runtime_assert(properties.header_event_fd != nullptr, "Invalid header_event_fd value"); + + // find the set of cluster events + auto cluster_it = configuration.cluster_specific_events.find(properties.cluster_id); + auto const * cluster_events = + (cluster_it != configuration.cluster_specific_events.end() ? &(cluster_it->second) : nullptr); + + // and core specific events + auto core_it = configuration.cpu_specific_events.find(properties.no); + auto const * core_events = + (core_it != configuration.cpu_specific_events.end() ? &(core_it->second) : nullptr); + + // and spe events + auto spe_it = core_no_to_spe_type.find(properties.no); + const auto spe_type = (spe_it != core_no_to_spe_type.end() ? spe_it->second : 0); + + // and uncore events + auto [uncore_ids, uncore_event_count] = find_all_uncore_ids_for(properties.no, pid); + + // check there is any work to do + auto has_no_events = configuration.global_events.empty() + && ((cluster_events == nullptr) || cluster_events->empty()) + && ((core_events == nullptr) || core_events->empty()) + && ((spe_type == 0) || configuration.spe_events.empty()) && (uncore_event_count == 0); + + if (has_no_events) { + LOG_DEBUG("No events configured for cpu=%d, pid=%d", lib::toEnumValue(properties.no), pid); + return aggregate_state_t::terminated; + } + + // create the entry + auto [it, inserted] = properties.binding_sets.try_emplace(pid, properties.no, pid); + + if (!inserted) { + LOG_ERROR("A binding set already exists for cpu=%d, pid=%d", lib::toEnumValue(properties.no), pid); + throw std::logic_error("Cannot create binding set for a process that already exists"); + } + + auto & binding_set = it->second; + + // first add all the global events + if (!configuration.global_events.empty()) { + auto result = binding_set.add_mixed(configuration.global_events); + // this should be impossible since the group is new + runtime_assert(result, + "Failed to add a global event configuration, perhaps the binding set is not offline"); + } + + // then add the cluster events + if (cluster_events != nullptr) { + auto result = binding_set.add_mixed(*cluster_events); + // this should be impossible since the group is new + runtime_assert(result, + "Failed to add a cluster event configuration, perhaps the binding set is not offline"); + } + + if (core_events != nullptr) { + auto result = binding_set.add_mixed(*core_events); + // this should be impossible since the group is new + runtime_assert(result, + "Failed to add a cpu event configuration, perhaps the binding set is not offline"); + } + + if ((!configuration.spe_events.empty()) && (spe_type > 0)) { + auto result = binding_set.add_mixed(get_retyped_spe_definitions(spe_type)); + // this should be impossible since the group is new + runtime_assert(result, + "Failed to add an SPE event configuration, perhaps the binding set is not offline"); + } + + for (auto id : uncore_ids) { + auto result = binding_set.add_mixed(configuration.uncore_specific_events.at(id)); + // this should be impossible since the group is new + runtime_assert(result, + "Failed to add an uncore event configuration, perhaps the binding set is not offline"); + properties.active_uncore_pmu_ids.insert(id); + all_active_uncore_pmu_ids.insert(id); + } + + // now all the bindings are created, now create the events + auto result = binding_set.create_events( + enable_on_exec && !capture_started, + id_to_key_mapping_tracker, + [pid, &mmap_tracker](std::shared_ptr fd, bool requires_aux) { + return mmap_tracker(pid, std::move(fd), requires_aux); + }, + *perf_activator, + spe_type); + + switch (result) { + case aggregate_state_t::usable: { + LOG_DEBUG("Create binding set for core=%d, pid=%d was successful", + lib::toEnumValue(properties.no), + pid); + return result; + } + + case aggregate_state_t::offline: + case aggregate_state_t::failed: + case aggregate_state_t::terminated: { + LOG_DEBUG( + "Create binding set for core=%d, pid=%d failed due to %s", + lib::toEnumValue(properties.no), + pid, + (result == aggregate_state_t::offline + ? "core offline" + : (result == aggregate_state_t::terminated ? "process terminated" : "unexpected error"))); + // erase the entry and return the result + properties.binding_sets.erase(it); + return result; + } + + default: { + throw std::runtime_error("unexpected aggregate_state_t"); + } + } + } + + /** Find all uncore pmus associated with some core that need to be brought online */ + [[nodiscard]] std::pair, std::size_t> find_all_uncore_ids_for(core_no_t no, pid_t pid) + { + std::pair, std::size_t> result {{}, 0}; + + // do nothing if the pid is not system-wide + if (pid != system_wide_pid) { + return result; + } + + // iterate each uncore pmu and check for inactive uncores that are associated with the cpu + for (auto const & [id, events] : configuration.uncore_specific_events) { + // already active on another core? + if (all_active_uncore_pmu_ids.count(id) > 0) { + LOG_DEBUG("Ignoring uncore %d on %d as already active", lib::toEnumValue(id), lib::toEnumValue(no)); + continue; + } + + auto const cpu_no = lib::toEnumValue(no); + auto const index = lib::toEnumValue(id); + runtime_assert((index >= 0) && (std::size_t(index) < uncore_pmus.size()), "Invalid uncore pmu id"); + auto const & pmu = uncore_pmus.at(index); + auto const cpu_mask = perf_utils::readCpuMask(pmu.getId()); + auto const current_cpu_not_in_mask = ((!cpu_mask.empty()) && (cpu_mask.count(cpu_no) == 0)); + auto const mask_is_empty_and_cpu_not_default = (cpu_mask.empty() && (cpu_no != 0)); + + // skip pmus not associated with this core + if (current_cpu_not_in_mask || mask_is_empty_and_cpu_not_default) { + LOG_DEBUG("Ignoring uncore %d on %d as not selected (%zu / %u / %u)", + lib::toEnumValue(id), + lib::toEnumValue(no), + cpu_mask.size(), + current_cpu_not_in_mask, + mask_is_empty_and_cpu_not_default); + continue; + } + + // found one + LOG_DEBUG("Selecting uncore %d on %d", lib::toEnumValue(id), lib::toEnumValue(no)); + result.first.insert(id); + result.second += events.size(); + } + + return result; + } + + /** Create binding sets on all known cores for the specified pid */ + template + [[nodiscard]] std::pair> create_binding_sets_for_pid( + IdToKeyMappingTracker && id_to_key_mapping_tracker, + std::vector & event_fds_by_core_no, + pid_t pid) + { + LOG_DEBUG("Create for pid %d", pid); + + bool all_offline = true; + std::set offline_cores {}; + + for (auto & entry : core_properties) { + // create the events + auto result = create_binding_set( + id_to_key_mapping_tracker, + make_mmap_tracker( + perf_activator, + entry.second.mmap, + entry.second.header_event_fd, + entry.second.no, + entry.second.cluster_id, + [no = entry.second.no, + &event_fds_by_core_no](pid_t, std::shared_ptr fd, bool requires_aux) { + event_fds_by_core_no.emplace_back(core_no_fd_pair_t {no, {std::move(fd), requires_aux}}); + }), + entry.second, + pid); + + switch (result) { + case aggregate_state_t::usable: { + all_offline = false; + break; + } + + case aggregate_state_t::offline: { + LOG_DEBUG("Track %d detected offline core %d", pid, lib::toEnumValue(entry.first)); + + // other cores may be online, so continue trying + offline_cores.insert(entry.first); + break; + } + + case aggregate_state_t::terminated: + case aggregate_state_t::failed: { // erase the entry and return the result + remove_binding_sets_for_pid(pid); + return {result, {}}; + } + + default: { + throw std::runtime_error("unexpected aggregate_state_t"); + } + } + } + + // remove all failed cores + for (auto no : offline_cores) { + core_offline(no); + } + + return {(all_offline ? aggregate_state_t::offline // + : aggregate_state_t::usable), + std::move(offline_cores)}; + } + + /** Offline and remove binding sets on all known cores for the specified pid */ + void remove_binding_sets_for_pid(pid_t pid) + { + LOG_DEBUG("Remove all for pid %d", pid); + + for (auto & entry : core_properties) { + auto it = entry.second.binding_sets.find(pid); + if (it != entry.second.binding_sets.end()) { + // first, offline it + it->second.offline(*perf_activator); + // then erase the set + entry.second.binding_sets.erase(it); + } + } + } + + /** + * Get (first create) a copy of the event defintions in configuration.spe_events, but with the attr.type field changed to match + * the provided type parameter. + * + * @param type The type parameter to set for the event definitions + * @return The vector of modified event definitions + */ + std::vector const & get_retyped_spe_definitions(std::uint32_t type) + { + auto & result = spe_event_definitions_retyped[type]; + + if (result.empty()) { + for (auto const & event : configuration.spe_events) { + auto & inserted = result.emplace_back(event); + inserted.attr.type = type; + } + } + + return result; + } + + /** Common code for offlining and removing a core entry */ + void core_offline_it(typename std::map::iterator it) + { + auto & binding_sets = it->second.binding_sets; + + // transition all the event sets into offline state + for (auto & entry : binding_sets) { + entry.second.offline(*perf_activator); + } + + // make sure to mark any uncores as inactive + for (auto id : it->second.active_uncore_pmu_ids) { + all_active_uncore_pmu_ids.erase(id); + } + + // finally, close the header event explicitly (so that any thing waiting on it will be cancelled) + auto fd = it->second.header_event_fd; + if (fd != nullptr) { + fd->close(); + } + + // finally, erase it, freeing up the entry in the map + core_properties.erase(it); + } + + /** returned by core_online_prepare_header */ + struct core_online_prepare_header_result_t { + aggregate_state_t state; + perf_event_id_t id {perf_event_id_t::invalid}; + std::shared_ptr fd {}; + }; + + /** + * Prepare the header event that all the other events are expected to redirect their mmap events through + */ + core_online_prepare_header_result_t core_online_prepare_header( + core_no_t no, + cpu_cluster_id_t cluster_id, + typename std::map::iterator it) + { + using enable_state_t = typename perf_activator_t::enable_state_t; + using event_creation_status_t = typename perf_activator_t::event_creation_status_t; + using read_ids_status_t = typename perf_activator_t::read_ids_status_t; + + LOG_DEBUG("Creating core header %d 0x%x", lib::toEnumValue(no), lib::toEnumValue(cluster_id)); + auto header_result = perf_activator->create_event(configuration.header_event, + enable_state_t::enabled, + no, + (is_system_wide ? system_wide_pid : self_pid), + -1); + + switch (header_result.status) { + case event_creation_status_t::failed_fatal: + case event_creation_status_t::failed_invalid_pid: + case event_creation_status_t::failed_invalid_device: { + LOG_DEBUG("Creating core header %d 0x%x failed.", + lib::toEnumValue(no), + lib::toEnumValue(cluster_id)); + core_properties.erase(it); + return {aggregate_state_t::failed}; + } + case event_creation_status_t::failed_offline: { + LOG_DEBUG("Creating core header %d 0x%x was offline.", + lib::toEnumValue(no), + lib::toEnumValue(cluster_id)); + core_properties.erase(it); + return {aggregate_state_t::offline}; + } + case event_creation_status_t::success: { + if (perf_activator->is_legacy_kernel_requires_id_from_read()) { + // ok, read it first + auto [status, ids] = + perf_activator->read_legacy_ids(configuration.header_event.attr.read_format, + header_result.fd->native_handle(), + 1); + switch (status) { + case read_ids_status_t::failed_fatal: { + LOG_DEBUG("Creating core header %d 0x%x failed to read id.", + lib::toEnumValue(no), + lib::toEnumValue(cluster_id)); + core_properties.erase(it); + return {aggregate_state_t::failed}; + } + case read_ids_status_t::failed_offline: { + LOG_DEBUG("Creating core header %d 0x%x failed to read id as offline.", + lib::toEnumValue(no), + lib::toEnumValue(cluster_id)); + core_properties.erase(it); + return {aggregate_state_t::offline}; + } + case read_ids_status_t::success: { + header_result.perf_id = ids.at(0); + break; + } + }; + } + + return {aggregate_state_t::usable, header_result.perf_id, header_result.fd}; + } + default: { + throw std::runtime_error("unexpected aggregate_state_t"); + } + } + } + }; +} diff --git a/daemon/agents/perf/events/event_bindings.hpp b/daemon/agents/perf/events/event_bindings.hpp new file mode 100644 index 00000000..6469c63c --- /dev/null +++ b/daemon/agents/perf/events/event_bindings.hpp @@ -0,0 +1,634 @@ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "agents/perf/events/event_configuration.hpp" +#include "agents/perf/events/perf_activator.hpp" +#include "agents/perf/events/types.hpp" +#include "k/perf_event.h" +#include "lib/Assert.h" +#include "lib/EnumUtils.h" +#include "lib/Span.h" + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace agents::perf { + + /** Enumerates possible states for each binding */ + enum class event_binding_state_t { + /** The event has not been created or enabled */ + offline, + /** The event has been created (fd & perf if is valid), but it has not been activated yet */ + ready, + /** The event has been activated and is collecting data */ + online, + /** The evemt could not be created/activated due to some fatal error */ + failed, + /** The event was terminate (for example because the process being tracked has exited) */ + terminated, + /** The event was not supported on the given pmu */ + not_supported, + }; + + /** Enumerate possible states for the aggregate bindings */ + enum class aggregate_state_t { + /** All bindings are offline */ + offline, + /** At least some bindings are ready/online */ + usable, + /** All bindings are failed (or mix of failed / terminated) */ + failed, + /** All bindings are terminated */ + terminated, + }; + + /** + * An Event binding represents a single instance of a perf event, linking the event specification in the perf_event_attr, and gator key + * to its event fd and perf id. Each binding is for a single core+pid only. + * + * Bindings have state representing whether or not the event has been created, enabled/disabled, or failed. + * Events start in 'offline' state and are transitioned to 'ready' by the `create_event` method. + * Once an event is 'ready' it may be activated for data collection by the `start` method, moving it to the 'online' state. + * The event may also be disabled (pausing data collection) by the `stop` method, which will move the event back from 'online' to 'ready'. + * In any case where the even cannot be created/activated because of a fatal error, the event moves to the 'failed' state. + * The event may also be moved to the 'offline' state by the `offline` method, which fully deletes the perf event, closing any associated fd. + */ + template + class event_binding_t { + public: + using stream_descriptor_t = StreamDescriptor; + + explicit event_binding_t(event_definition_t const & event) : event(event) + { + runtime_assert((event.attr.read_format & PERF_FORMAT_ID) == PERF_FORMAT_ID, "PERF_FORMAT_ID is required"); + } + + /** @return the key associated with the event */ + [[nodiscard]] gator_key_t get_key() const { return event.key; } + + /** @return the perf id associated with the event */ + [[nodiscard]] perf_event_id_t get_id() const { return perf_id; } + + /** @return the file descriptor associated with the event */ + [[nodiscard]] int get_fd() { return (fd ? fd->native_handle() : -1); } + + /** @return the read_format for the event attribute */ + [[nodiscard]] std::uint64_t get_read_format() const { return event.attr.read_format; } + + /** @return true if the event is in the offline state, or false otherwise */ + [[nodiscard]] bool is_offline() const { return state == event_binding_state_t::offline; } + + /** Set the event id as read from the legacy read id method */ + void set_id(perf_event_id_t id) { perf_id = id; } + + /** + * Attempt to online this binding. + * If the binding is offline, then it is transitioned to ready. + * If the binding is ready or online, nothing happens. + * If the binding is in a failed state, or creating fails, then it will be put in / stay in failed state. + * @return The new state + */ + template + [[nodiscard]] event_binding_state_t create_event(bool enable_on_exec, + int group_fd, + MmapTracker && mmap_tracker, + PerfActivator && activator, + core_no_t core_no, + pid_t pid, + std::uint32_t spe_type) + { + + switch (state) { + case event_binding_state_t::offline: { + // attempt to create, updating state + return ( + state = + do_create_event(enable_on_exec, group_fd, mmap_tracker, activator, core_no, pid, spe_type)); + } + case event_binding_state_t::ready: + case event_binding_state_t::online: + case event_binding_state_t::failed: + case event_binding_state_t::terminated: + case event_binding_state_t::not_supported: + return state; + default: + throw std::runtime_error("unexpected event_binding_state_t"); + } + } + + /** Start the event if it is read, transitioning to 'online' */ + template + [[nodiscard]] event_binding_state_t start(PerfActivator && activator) + { + switch (state) { + case event_binding_state_t::ready: { + // start the event + auto result = activator.start(fd->native_handle()); + + if ((!result) && (fd->native_handle() == -1)) { + LOG_DEBUG("Raced against fd->close(), ignoring failure to start"); + return event_binding_state_t::online; + } + + // update the state and return it + state = (result ? event_binding_state_t::online : event_binding_state_t::failed); + return state; + } + + case event_binding_state_t::offline: + case event_binding_state_t::online: + case event_binding_state_t::failed: + case event_binding_state_t::terminated: + case event_binding_state_t::not_supported: { + // no change required + return state; + } + + default: + throw std::runtime_error("unexpected event_binding_state_t"); + } + } + + /** Clean up all data and move back to 'offline' or 'failed' state */ + template + void stop(PerfActivator && activator, bool failed) + { + // stop it if it is running + if (fd) { + activator.stop(fd->native_handle()); + } + + // clear state + this->fd.reset(); + this->perf_id = perf_event_id_t::invalid; + this->state = (failed ? event_binding_state_t::failed : event_binding_state_t::offline); + } + + private: + /** Does the attr require an aux buffer ? */ + static constexpr bool requires_aux(std::uint64_t spe_type, std::uint64_t attr_type) + { + return ((attr_type >= PERF_TYPE_MAX) && (attr_type == spe_type)); + } + + /** The attribute structure */ + event_definition_t const & event; + /** The current state of the binding */ + event_binding_state_t state {event_binding_state_t::offline}; + /** The id allocated by perf */ + perf_event_id_t perf_id = perf_event_id_t::invalid; + /** The opened file descriptor */ + std::shared_ptr fd {}; + + /** Perform the online, returning the new state */ + template + [[nodiscard]] event_binding_state_t do_create_event(bool enable_on_exec, + int group_fd, + MmapTracker && mmap_tracker, + PerfActivator && activator, + core_no_t core_no, + pid_t pid, + std::uint32_t spe_type) + { + // never enable it (Streamline expects the id->key map to be received before any ringbuffer data) + const auto enable_state = (enable_on_exec ? perf_activator_t::enable_state_t::enable_on_exec + : perf_activator_t::enable_state_t::disabled); + // do the activation + const auto result = activator.create_event(event, enable_state, core_no, pid, group_fd); + + switch (result.status) { + case perf_activator_t::event_creation_status_t::success: { + // add it to the mmap + if (!mmap_tracker(result.fd, requires_aux(spe_type, event.attr.type))) { + return event_binding_state_t::failed; + } + // success + this->perf_id = result.perf_id; + this->fd = std::move(result.fd); + return event_binding_state_t::ready; + } + + case perf_activator_t::event_creation_status_t::failed_offline: + return event_binding_state_t::offline; + + case perf_activator_t::event_creation_status_t::failed_invalid_device: + return event_binding_state_t::not_supported; + + case perf_activator_t::event_creation_status_t::failed_invalid_pid: + return event_binding_state_t::terminated; + + case perf_activator_t::event_creation_status_t::failed_fatal: + return event_binding_state_t::failed; + + default: + throw std::runtime_error("unexpected event_creation_status_t"); + } + } + }; + + /** + * Represents a group of one or more event bindings collected into a perf event group + */ + template + class event_binding_group_t { + public: + using event_binding_type = event_binding_t; + + event_binding_group_t(event_definition_t const & leader, lib::Span children) + { + runtime_assert(children.empty() || ((leader.attr.read_format & PERF_FORMAT_GROUP) == PERF_FORMAT_GROUP), + "Must be a stand alone attribute, or PERF_FORMAT_GROUP is required"); + + // insert the leader as the first item + bindings.emplace_back(leader); + // and the children follow it + for (auto const & child : children) { + bindings.emplace_back(child); + } + } + + /** Insert another child event into the group */ + [[nodiscard]] bool add_event(event_definition_t const & event) + { + if (!bindings.front().is_offline()) { + return false; + } + + bindings.emplace_back(event); + return true; + } + + /** + * Create all event bindings in the group + */ + template + [[nodiscard]] aggregate_state_t create_events(bool enable_on_exec, + IdToKeyMappingTracker && id_to_key_mapping_tracker, + MmapTracker && mmap_tracker, + PerfActivator && activator, + core_no_t core_no, + pid_t pid, + std::uint32_t spe_type) + { + auto const legacy_id_from_read = activator.is_legacy_kernel_requires_id_from_read(); + std::vector bindings_for_id_read {}; + + // first activate the leader + auto & leader = bindings.front(); + auto leader_state = + leader.create_event(enable_on_exec, -1, mmap_tracker, activator, core_no, pid, spe_type); + switch (leader_state) { + case event_binding_state_t::ready: + if (legacy_id_from_read) { + bindings_for_id_read.emplace_back(&leader); + } + else { + id_to_key_mapping_tracker(leader.get_key(), leader.get_id()); + } + break; + + case event_binding_state_t::offline: + return aggregate_state_t::offline; + + case event_binding_state_t::online: + return aggregate_state_t::usable; + + case event_binding_state_t::terminated: + return aggregate_state_t::terminated; + + case event_binding_state_t::failed: + case event_binding_state_t::not_supported: + return aggregate_state_t::failed; + + default: + throw std::runtime_error("unexpected event_binding_state_t"); + } + + auto const group_fd = leader.get_fd(); + + // nnw activate any children + for (std::size_t n = 1; n < bindings.size(); ++n) { + auto & child = bindings.at(n); + auto child_state = + child.create_event(enable_on_exec, group_fd, mmap_tracker, activator, core_no, pid, spe_type); + switch (child_state) { + case event_binding_state_t::ready: + case event_binding_state_t::online: + if (legacy_id_from_read) { + bindings_for_id_read.emplace_back(&child); + } + else { + id_to_key_mapping_tracker(child.get_key(), child.get_id()); + } + break; + + case event_binding_state_t::terminated: + // the process terminated, just deactive all events + return destroy_events(activator, n + 1, aggregate_state_t::terminated); + + case event_binding_state_t::offline: + // the core went offline, just deactive all events + return destroy_events(activator, n + 1, aggregate_state_t::offline); + + case event_binding_state_t::failed: + // the event failed with unexpected error, offline all events and return failed + return destroy_events(activator, n + 1, aggregate_state_t::failed); + + case event_binding_state_t::not_supported: + // ignored for non-group leaders as usually means legacy bigLITTLE setup + break; + + default: + throw std::runtime_error("unexpected event_binding_state_t"); + } + } + + if (!legacy_id_from_read) { + return aggregate_state_t::usable; + } + + // now get any ids + auto [status, ids] = + activator.read_legacy_ids(leader.get_read_format(), group_fd, bindings_for_id_read.size()); + + switch (status) { + case perf_activator_t::read_ids_status_t::success: { + // the sizes should match; if not it indicates an unexpected error + if (ids.size() != bindings_for_id_read.size()) { + return destroy_events(activator, bindings.size(), aggregate_state_t::failed); + } + + // map the ids to the bindings + for (std::size_t n = 0; n < bindings_for_id_read.size(); ++n) { + // update the binding + auto * binding = bindings_for_id_read[n]; + binding->set_id(ids[n]); + // and add to the tracker + id_to_key_mapping_tracker(binding->get_key(), binding->get_id()); + } + + return aggregate_state_t::usable; + } + + case perf_activator_t::read_ids_status_t::failed_fatal: + return destroy_events(activator, bindings.size(), aggregate_state_t::failed); + + case perf_activator_t::read_ids_status_t::failed_offline: + return destroy_events(activator, bindings.size(), aggregate_state_t::offline); + + default: + throw std::runtime_error("unexpected read_ids_status_t"); + } + } + + /** Start the events if it is read, transitioning to 'online' */ + template + [[nodiscard]] aggregate_state_t start(PerfActivator && activator) + { + auto result = bindings.front().start(activator); + switch (result) { + case event_binding_state_t::offline: + return aggregate_state_t::offline; + + case event_binding_state_t::online: + return aggregate_state_t::usable; + + case event_binding_state_t::failed: + case event_binding_state_t::not_supported: + return aggregate_state_t::failed; + + case event_binding_state_t::terminated: + return aggregate_state_t::terminated; + + case event_binding_state_t::ready: + default: + throw std::runtime_error("unexpected event_binding_state_t"); + } + } + + /** Clean up all data and move back to 'offline' or 'failed' state */ + template + void stop(PerfActivator && activator, bool failed) + { + for (auto & binding : bindings) { + binding.stop(activator, failed); + } + } + + private: + std::vector bindings {}; + + /** + * Destroy any events previously created and return an error + * + * @param activator The activator used to stop the events + * @param down_from The last event that was created (all bindings < down_from will be destroyed, with the assumption that all later ones are not yet created) + * @param reason The failure reason + * @return The aggregate state value + */ + template + [[nodiscard]] aggregate_state_t destroy_events(PerfActivator && activator, + std::size_t down_from, + aggregate_state_t reason) + { + for (std::size_t n = 0; n < down_from; ++n) { + bindings.at(n).stop(activator, reason == aggregate_state_t::failed); + } + + return reason; + } + }; + + /** + * Maintains a set of event binding groups that are all associated with the same core / pid. + * Allows transitioning of bindings as a unit from one state to another + */ + template + class event_binding_set_t { + public: + using event_binding_group_type = event_binding_group_t; + + /** + * Construct a new per core event bindings object, for a set of events associated with the provided core, and pid + * + * @param core_no The core no for all events (may be -1 as per perf_event_open) + * @param pid The pid for all events (may be -1 or 0 as per perf_event_open) + */ + event_binding_set_t(core_no_t core_no, pid_t pid) : core_no(core_no), pid(pid) {} + + /** Add a stand alone event + * @retval true if the event was successfully added + * @retval false if the event was not added (e.g. because the bindings were not offline) + */ + [[nodiscard]] bool add_event(event_definition_t const & event) + { + if (state != aggregate_state_t::offline) { + return false; + } + + groups.emplace_back(event, lib::Span {}); + return true; + } + + /** Add a group of events, where the first item in the span is the leader. + * @retval true if the events were successfully added + * @retval false if the events were not added (e.g. because the bindings were not offline, or the span is empty) + */ + [[nodiscard]] bool add_group(lib::Span events) + { + if ((state != aggregate_state_t::offline) || events.empty()) { + return false; + } + + groups.emplace_back(events.front(), events.subspan(1)); + return true; + } + + /** + * Give some event definitions, where the first is a group leader and the rest are + * a mix of stand alone events and members of that group, create the appropriate + * events and groups and add them to the set + * + * @retval true if the events were successfully added + * @retval false if the events wer not added (e.g. because the bindings were not offline, or the span is empty) + */ + [[nodiscard]] bool add_mixed(lib::Span events) + { + if ((state != aggregate_state_t::offline) || events.empty()) { + return false; + } + + runtime_assert(is_group_leader(events.front()), "First item must be group leader"); + + auto & group = groups.emplace_back(events.front(), lib::Span()); + + for (auto const & event : events.subspan(1)) { + if (is_stand_alone(event)) { + groups.emplace_back(event, lib::Span()); + } + else { + auto result = group.add_event(event); + runtime_assert(result, "expected event to be inserted into new group"); + } + } + + return true; + } + + /** @return the current state */ + [[nodiscard]] aggregate_state_t get_state() const noexcept { return state; } + + /** Attempt to ready all the event bindings. */ + template + [[nodiscard]] aggregate_state_t create_events(bool enable_on_exec, + IdToKeyMappingTracker && id_to_key_mapping_tracker, + MmapTracker && mmap_tracker, + PerfActivator && activator, + std::uint32_t spe_type) + { + bool any_usable = false; + + for (std::size_t n = 0; n < groups.size(); ++n) { + auto & group = groups.at(n); + auto result = group.create_events(enable_on_exec, + id_to_key_mapping_tracker, + mmap_tracker, + activator, + core_no, + pid, + spe_type); + switch (result) { + case aggregate_state_t::usable: + any_usable = true; + break; + case aggregate_state_t::terminated: + case aggregate_state_t::offline: + case aggregate_state_t::failed: + return (state = destroy_groups(activator, n + 1, result)); + + default: + throw std::runtime_error("unexpected aggregate_state_t"); + } + } + + return (state = (any_usable ? aggregate_state_t::usable : aggregate_state_t::terminated)); + } + + /** Attempt to online all the event bindings. */ + template + [[nodiscard]] aggregate_state_t start(PerfActivator && activator) + { + bool any_usable = false; + + for (auto & group : groups) { + auto result = group.start(activator); + switch (result) { + case aggregate_state_t::usable: + any_usable = true; + break; + case aggregate_state_t::terminated: + case aggregate_state_t::offline: + case aggregate_state_t::failed: + return (state = destroy_groups(activator, groups.size(), result)); + + default: + throw std::runtime_error("unexpected aggregate_state_t"); + } + } + + return (state = (any_usable ? aggregate_state_t::usable : aggregate_state_t::terminated)); + } + + /** Clean up all data and move back to 'offline' state. */ + template + void offline(PerfActivator && activator) + { + for (auto & group : groups) { + group.stop(activator, false); + } + state = aggregate_state_t::offline; + } + + private: + [[nodiscard]] static constexpr bool is_group_leader(event_definition_t const & event) + { + return event.attr.pinned; + } + + [[nodiscard]] static constexpr bool is_stand_alone(event_definition_t const & event) + { + return event.attr.pinned; + } + + std::vector groups {}; + core_no_t core_no; + pid_t pid; + aggregate_state_t state {aggregate_state_t::offline}; + + /** + * Destroy any groups previously created and return an error + * + * @param down_from The last group that was created (all group < down_from will be destroyed, with the assumption that all later ones are not yet created) + * @param reason The faiure reason + * @return The aggregate state value + */ + template + [[nodiscard]] aggregate_state_t destroy_groups(PerfActivator && activator, + std::size_t down_from, + aggregate_state_t reason) + { + for (std::size_t n = 0; n < down_from; ++n) { + groups.at(n).stop(activator, reason == aggregate_state_t::failed); + } + return reason; + } + }; +} diff --git a/daemon/agents/perf/events/event_configuration.hpp b/daemon/agents/perf/events/event_configuration.hpp new file mode 100644 index 00000000..a099b554 --- /dev/null +++ b/daemon/agents/perf/events/event_configuration.hpp @@ -0,0 +1,39 @@ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "agents/perf/events/types.hpp" +#include "k/perf_event.h" + +#include +#include +#include + +namespace agents::perf { + + /** + * Defines a single perf event's properties for an event that should be captured + */ + struct event_definition_t { + perf_event_attr attr; + gator_key_t key; + }; + + /** + * Defines the active capture configuration for the perf capture service + */ + struct event_configuration_t { + /** An empty (dummy) event used for the output event for a per-cpu mmap */ + event_definition_t header_event; + /** The set of events that should be selected globally (i.e. on every active CPU, regardless of CPU type) */ + std::vector global_events {}; + /** The SPE events, defining the events that may be activated for every CPU that supports SPE */ + std::vector spe_events {}; + /** The map from cluster index to set of events, defining the events that may be activated for any CPU matching a given type */ + std::map> cluster_specific_events {}; + /** The map from uncore pmu index to set of events, defining the events that may be activated for that uncore */ + std::map> uncore_specific_events {}; + /** The map of CPU specific events, defining the events that may be activated for a specific CPU */ + std::map> cpu_specific_events {}; + }; +} diff --git a/daemon/agents/perf/events/perf_activator.cpp b/daemon/agents/perf/events/perf_activator.cpp new file mode 100644 index 00000000..9b048368 --- /dev/null +++ b/daemon/agents/perf/events/perf_activator.cpp @@ -0,0 +1,476 @@ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ + +#include "agents/perf/events/perf_activator.hpp" + +#include "Logging.h" +#include "agents/perf/events/event_configuration.hpp" +#include "agents/perf/events/types.hpp" +#include "k/perf_event.h" +#include "lib/Assert.h" +#include "lib/AutoClosingFd.h" +#include "lib/EnumUtils.h" +#include "lib/Span.h" +#include "lib/String.h" +#include "lib/Syscall.h" +#include "lib/Utils.h" +#include "lib/error_code_or.hpp" +#include "linux/perf/PerfUtils.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace agents::perf { + namespace { + + inline lib::AutoClosingFd perf_event_open(perf_event_attr & attr, + pid_t pid, + int core, + int group_fd, + bool supports_cloexec) + { + auto const flags = PERF_FLAG_FD_OUTPUT | (supports_cloexec ? PERF_FLAG_FD_CLOEXEC : 0UL); + + int result = lib::perf_event_open(&attr, pid, core, group_fd, flags); + if (result < 0) { + return {}; + } + + if (!supports_cloexec) { + int fdf = lib::fcntl(result, F_GETFD); + //NOLINTNEXTLINE(hicpp-signed-bitwise) - FD_CLOEXEC + if (lib::fcntl(result, F_SETFD, fdf | FD_CLOEXEC) != 0) { + LOG_DEBUG("failed to set CLOEXEC on perf event due to %d", errno); + } + } + + return lib::AutoClosingFd {result}; + } + + lib::error_code_or_t try_perf_event_open(perf_event_attr & attr, + pid_t pid, + int core, + int group_fd, + bool supports_cloexec, + lib::Span const> patterns) + { + for (auto const pattern : patterns) { + // set + attr.exclude_kernel = pattern[0]; + attr.exclude_hv = pattern[1]; + attr.exclude_idle = pattern[2]; + + // try to open the event as is + auto fd = perf_event_open(attr, pid, core, group_fd, supports_cloexec); + + // take a copy of errno so that logging calls etc don't overwrite it + auto peo_errno = boost::system::errc::make_error_code(boost::system::errc::errc_t(errno)); + + // successfully created? + if (fd) { + LOG_DEBUG("Succeeded when exclude_kernel=%u, exclude_hv=%u, exclude_idle=%u", + bool(attr.exclude_kernel), + bool(attr.exclude_hv), + bool(attr.exclude_idle)); + + return {std::move(fd)}; + } + + LOG_DEBUG("Failed when exclude_kernel=%u, exclude_hv=%u, exclude_idle=%u with %s", + bool(attr.exclude_kernel), + bool(attr.exclude_hv), + bool(attr.exclude_idle), + peo_errno.message().c_str()); + + // not an error we can retry? + if ((peo_errno != boost::system::errc::errc_t::permission_denied) + && (peo_errno != boost::system::errc::errc_t::operation_not_permitted) + && (peo_errno != boost::system::errc::errc_t::operation_not_supported)) { + return {peo_errno}; + } + } + + // just return permission denied + return {boost::system::errc::make_error_code(boost::system::errc::permission_denied)}; + } + + perf_event_id_t read_perf_id(int fd) + { + // get the id + std::uint64_t id = 0; + //NOLINTNEXTLINE(hicpp-signed-bitwise) - PERF_EVENT_IOC_ID + if ((lib::ioctl(fd, PERF_EVENT_IOC_ID, reinterpret_cast(&id)) != 0) +#if (__SIZEOF_LONG__ < 8) + // Workaround for running 32-bit gatord on 64-bit systems, kernel patch in the works + && (lib::ioctl(fd, + ((PERF_EVENT_IOC_ID & ~IOCSIZE_MASK) | (8 << _IOC_SIZESHIFT)), + reinterpret_cast(&id)) + != 0) +#endif + ) { + return perf_event_id_t::invalid; + } + return perf_event_id_t(id); + } + + mmap_ptr_t try_mmap_with_logging(core_no_t core_no, + const buffer_config_t & config, + std::size_t length, + off_t offset, + int fd) + { + mmap_ptr_t result {lib::mmap(nullptr, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset), length}; + + if (!result) { + auto const mm_errno = boost::system::errc::make_error_code(boost::system::errc::errc_t(errno)); + + LOG_DEBUG("mmap failed for fd %i (errno=%d, %s, mmapLength=%zu, offset=%zu)", + fd, + mm_errno.value(), + mm_errno.message().c_str(), + length, + static_cast(offset)); + + if ((mm_errno == boost::system::errc::errc_t::not_enough_memory) + || ((errno == boost::system::errc::errc_t::operation_not_permitted) && (getuid() != 0))) { + LOG_ERROR("Could not mmap perf buffer on cpu %d, '%s' (errno: %d) returned.\n" + "This may be caused by a limit in /proc/sys/kernel/perf_event_mlock_kb.\n" + "Try again with a smaller value of --mmap-pages.\n" + "Usually, a value of ((perf_event_mlock_kb * 1024 / page_size) - 1) or lower will work.\n" + "The current effective value for --mmap-pages is %zu", + lib::toEnumValue(core_no), + mm_errno.message().c_str(), + mm_errno.value(), + config.data_buffer_size / config.page_size); + + // log online state for core + lib::dyn_printf_str_t online_path {"/sys/devices/system/cpu/cpu%d/online", + lib::toEnumValue(core_no)}; + std::int64_t online_status = 0; + lib::readInt64FromFile(online_path.c_str(), online_status); + LOG_DEBUG("Online status for cpu%d is %" PRId64, lib::toEnumValue(core_no), online_status); + + // and mlock value + std::optional file_value = perf_utils::readPerfEventMlockKb(); + if (file_value.has_value()) { + LOG_DEBUG(" Perf MlockKb Value is %" PRId64, file_value.value()); + } + else { + LOG_DEBUG("reading Perf MlockKb returned null"); + } + } + } + else { + LOG_DEBUG("mmap passed for fd %i (mmapLength=%zu, offset=%zu)", + fd, + length, + static_cast(offset)); + } + + return result; + } + + /** + * Calculate the mmap region from @a config. + * + * @param config Buffer config. + * @return Size in bytes. + */ + [[nodiscard]] constexpr std::size_t get_data_mmap_length(buffer_config_t const & config) + { + return config.page_size + config.data_buffer_size; + } + } + + bool perf_activator_t::is_legacy_kernel_requires_id_from_read() const + { + return !capture_configuration->perf_config.has_ioctl_read_id; + } + + std::pair> + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) + perf_activator_t::read_legacy_ids(std::uint64_t read_format, int group_fd, std::size_t nr_ids) + { + constexpr int retry_count = 10; + + auto const is_id = ((read_format & PERF_FORMAT_ID) == PERF_FORMAT_ID); + runtime_assert(is_id, "PERF_FORMAT_ID is required"); + auto const is_group = ((read_format & PERF_FORMAT_GROUP) == PERF_FORMAT_GROUP); + auto const is_time_enabled = ((read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) == PERF_FORMAT_TOTAL_TIME_ENABLED); + auto const is_time_running = ((read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) == PERF_FORMAT_TOTAL_TIME_RUNNING); + auto const required_u64 = (is_time_enabled ? 1 : 0) // + + (is_time_running ? 1 : 0) // + + (is_group ? (nr_ids * 2) + 1 : 2); + + std::vector buffer(required_u64); + + for (int retry = 0; retry < retry_count; ++retry) { + auto bytes = + lib::read(group_fd, reinterpret_cast(buffer.data()), buffer.size() * sizeof(std::uint64_t)); + + if (bytes < 0) { + auto rerrno = boost::system::errc::make_error_code(boost::system::errc::errc_t(errno)); + LOG_DEBUG("read failed for read_legacy_ids with %d (%s)", rerrno.value(), rerrno.message().c_str()); + return {read_ids_status_t::failed_fatal, {}}; + } + + if (bytes == 0) { + /* pinning failed, retry */ + usleep(1); + continue; + } + + // decode the buffer and emit key->id mappings + auto const nr = (is_group ? buffer[0] : 1); + auto const id_offset = (1 + (is_time_enabled ? 1 : 0) + (is_time_running ? 1 : 0) + (is_group ? 1 : 0)); + + if (nr != nr_ids) { + LOG_ERROR("Unexpected read_format data read (invalid size, expected %zu, got %" PRIu64 ", group=%u)", + nr_ids, + nr, + is_group); + return {read_ids_status_t::failed_fatal, {}}; + } + + std::vector result {}; + for (std::size_t n = 0; n < nr; ++n) { + auto const id = perf_event_id_t(buffer[id_offset + (2 * n)]); + result.emplace_back(id); + } + return {read_ids_status_t::success, std::move(result)}; + } + + return {read_ids_status_t::failed_offline, {}}; + } + + //NOLINTNEXTLINE(readability-function-cognitive-complexity) + perf_activator_t::event_creation_result_t perf_activator_t::create_event(event_definition_t const & event, + enable_state_t enable_state, + core_no_t core_no, + pid_t pid, + int group_fd) + { + constexpr std::array, 4> exclude_pattern_exclude_kernel {{ + // exclude_kernel, exclude_hv, exclude_idle + {{true, true, true}}, + {{true, true, false}}, + {{true, false, true}}, + {{true, false, false}}, + }}; + + constexpr std::array, 6> exclude_pattern_include_kernel {{ + // exclude_kernel, exclude_hv, exclude_idle + {{false, false, false}}, + {{false, true, false}}, + // these are the same as per exclude_pattern_exclude_kernel + {{true, true, true}}, + {{true, true, false}}, + {{true, false, true}}, + {{true, false, false}}, + }}; + + // prepare the attribute + // Note we are modifying the attr after we have marshalled it + // but we are assuming the modifications are not important to Streamline + auto attr = event.attr; + + // set enable on exec bit + attr.disabled = ((group_fd < 0) && (enable_state != enable_state_t::enabled)); + attr.enable_on_exec = (attr.disabled && (enable_state == enable_state_t::enable_on_exec)); + + LOG_DEBUG("Opening attribute:\n" + " cpu: %i\n" + " key: %i\n" + " -------------\n" + "%s", + lib::toEnumValue(core_no), + lib::toEnumValue(event.key), + perf_event_printer.perf_attr_to_string(attr, core_no, " ", "\n").c_str()); + LOG_DEBUG("perf_event_open: cpu: %d, pid: %d, leader = %d", lib::toEnumValue(core_no), pid, group_fd); + + lib::AutoClosingFd fd {}; + boost::system::error_code peo_errno {}; + + // if the attr excludes kernel events, then try by excluding various combinations of exclude_bits starting from most restrictive + if (attr.exclude_kernel) { + auto result = try_perf_event_open(attr, + pid, + int(core_no), + group_fd, + capture_configuration->perf_config.has_fd_cloexec, + exclude_pattern_exclude_kernel); + + lib::get_error_or_value(std::move(result), fd, peo_errno); + } + else { + auto result = try_perf_event_open(attr, + pid, + int(core_no), + group_fd, + capture_configuration->perf_config.has_fd_cloexec, + exclude_pattern_include_kernel); + + lib::get_error_or_value(std::move(result), fd, peo_errno); + } + + // process the failure? + if (!fd) { + LOG_DEBUG("... failed %d %s", peo_errno.value(), peo_errno.message().c_str()); + + if (peo_errno == boost::system::errc::errc_t::no_such_device) { + // CPU offline + return event_creation_result_t {event_creation_status_t::failed_offline}; + } + if (peo_errno == boost::system::errc::errc_t::no_such_process) { + // thread terminated before we could open the event + return event_creation_result_t {event_creation_status_t::failed_invalid_pid}; + } + if (peo_errno == boost::system::errc::errc_t::no_such_file_or_directory) { + // event doesn't apply to this cpu + return event_creation_result_t {event_creation_status_t::failed_invalid_device}; + } + + // all other errors are fatal + std::ostringstream error_message {}; + + error_message << "perf_event_open failed to online counter for " + << perf_event_printer.map_attr_type(attr.type, core_no); + error_message << " with config=0x" << std::hex << attr.config << std::dec; + error_message << " on CPU " << int(core_no); + error_message << ". Failure given was errno=" << peo_errno.value() << " (" << peo_errno.message() << ")."; + + if (capture_configuration->perf_config.is_system_wide) { + if (peo_errno == boost::system::errc::errc_t::invalid_argument) { + switch (event.attr.type) { + case PERF_TYPE_BREAKPOINT: + case PERF_TYPE_SOFTWARE: + case PERF_TYPE_TRACEPOINT: + break; + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_RAW: + default: + error_message << "\n\nAnother process may be using the PMU counter, or the " + "combination requested may not be supported by the hardware. Try " + "removing some events."; + break; + } + } + } + + // some other error + return event_creation_result_t {peo_errno, error_message.str()}; + } + + // read the id + perf_event_id_t perf_id = perf_event_id_t::invalid; + + if (capture_configuration->perf_config.has_ioctl_read_id) { + perf_id = read_perf_id(*fd); + if (perf_id == perf_event_id_t::invalid) { + // take a new copy of the errno if it failed, before calling log + peo_errno = boost::system::errc::make_error_code(boost::system::errc::errc_t(errno)); + LOG_DEBUG("Reading a perf event id failed for file-descriptor %d with error %d (%s)", + *fd, + peo_errno.value(), + peo_errno.message().c_str()); + return event_creation_result_t {peo_errno}; + } + } + + LOG_DEBUG("... event activated successfully %" PRIu64 " %d", lib::toEnumValue(perf_id), *fd); + + // complete + return event_creation_result_t {perf_id, + std::make_shared(context, fd.release())}; + } + + //NOLINTNEXTLINE(readability-convert-member-functions-to-static) + bool perf_activator_t::set_output(int fd, int output_fd) + { + runtime_assert((output_fd > 0), "invalid output_fd"); + + //NOLINTNEXTLINE(hicpp-signed-bitwise) - PERF_EVENT_IOC_SET_OUTPUT + if (lib::ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output_fd) != 0) { + // take a new copy of the errno if it failed, before calling log + auto peo_errno = boost::system::errc::make_error_code(boost::system::errc::errc_t(errno)); + LOG_DEBUG("Setting the output fd for perf event %d with error %d (%s)", + fd, + peo_errno.value(), + peo_errno.message().c_str()); + return false; + } + + return true; + } + + //NOLINTNEXTLINE(readability-convert-member-functions-to-static) + perf_ringbuffer_mmap_t perf_activator_t::mmap_data(core_no_t core_no, int fd) + { + auto const & ringbuffer_config = capture_configuration->ringbuffer_config; + auto const data_length = get_data_mmap_length(ringbuffer_config); + + auto data_mapping = try_mmap_with_logging(core_no, ringbuffer_config, data_length, 0, fd); + if (!data_mapping) { + return {}; + } + + return {ringbuffer_config.page_size, std::move(data_mapping)}; + } + + void perf_activator_t::mmap_aux(perf_ringbuffer_mmap_t & mmap, core_no_t core_no, int fd) + { + auto const & ringbuffer_config = capture_configuration->ringbuffer_config; + auto const data_length = get_data_mmap_length(ringbuffer_config); + auto const aux_length = ringbuffer_config.aux_buffer_size; + + if (data_length > std::numeric_limits::max()) { + LOG_DEBUG("Offset for perf aux buffer is out of range: %zu", data_length); + return; + } + + // Update the header + auto * pemp = mmap.header(); + pemp->aux_offset = data_length; + pemp->aux_size = aux_length; + + auto aux_mapping = + try_mmap_with_logging(core_no, ringbuffer_config, aux_length, static_cast(data_length), fd); + if (!aux_mapping) { + return; + } + + mmap.set_aux_mapping(std::move(aux_mapping)); + } + + //NOLINTNEXTLINE(readability-convert-member-functions-to-static) + bool perf_activator_t::start(int fd) + { + LOG_DEBUG("enabling fd %d", fd); + //NOLINTNEXTLINE(hicpp-signed-bitwise) - PERF_EVENT_IOC_ENABLE + return (lib::ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == 0); + } + + //NOLINTNEXTLINE(readability-convert-member-functions-to-static) + bool perf_activator_t::stop(int fd) + { + LOG_DEBUG("disabling fd %d", fd); + //NOLINTNEXTLINE(hicpp-signed-bitwise) - PERF_EVENT_IOC_DISABLE + return (lib::ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == 0); + } + + //NOLINTNEXTLINE(readability-convert-member-functions-to-static) + bool perf_activator_t::re_enable(int fd) + { + LOG_DEBUG("enabling fd %d", fd); + //NOLINTNEXTLINE(hicpp-signed-bitwise) - PERF_EVENT_IOC_ENABLE + return (lib::ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) == 0); + } +} diff --git a/daemon/agents/perf/events/perf_activator.hpp b/daemon/agents/perf/events/perf_activator.hpp new file mode 100644 index 00000000..6a59fa36 --- /dev/null +++ b/daemon/agents/perf/events/perf_activator.hpp @@ -0,0 +1,211 @@ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "agents/perf/capture_configuration.h" +#include "agents/perf/events/event_configuration.hpp" +#include "agents/perf/events/perf_event_utils.hpp" +#include "agents/perf/events/perf_ringbuffer_mmap.hpp" +#include "agents/perf/events/types.hpp" +#include "lib/Syscall.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +namespace agents::perf { + /** + * Interface for object used to create and manipulate raw perf events + */ + class perf_activator_t { + public: + /** Configures how/when events should be enabled */ + enum class enable_state_t { + /** Event is created in a disabled state */ + disabled, + /** Event is created in a disabled state with enable_on_exec set */ + enable_on_exec, + /** Event is created in an enabled state */ + enabled, + }; + + /** Enumerates event creation result status possibilities */ + enum class event_creation_status_t { + /** The event creation failed due to some error */ + failed_fatal, + /** The event creation failed because the target core was offline */ + failed_offline, + /** The event creation failed because the target pid was invalid */ + failed_invalid_pid, + /** The event creation failed because the event was not supported on the specified pmu (or cpu) */ + failed_invalid_device, + /** The event creation succeeded */ + success, + }; + + /** Returend as part of read_legacy_ids to indicate result status */ + enum class read_ids_status_t { + /** Reading the ids failed with a fatal error */ + failed_fatal, + /** Reading the ids failed because the core was offline */ + failed_offline, + /** Reading the ids succeeded */ + success, + }; + + /** The stream type */ + using stream_descriptor_t = boost::asio::posix::stream_descriptor; + + /** Event creation result tuple returned by the create_event function */ + struct event_creation_result_t { + /** The event ID, or invalid. Only meaningful when (failed == false) */ + perf_event_id_t perf_id {perf_event_id_t::invalid}; + /** The event file descriptor. Only meaningful when (failed == false) && (perf_id != invalid) */ + std::shared_ptr fd {}; + /** The result status */ + event_creation_status_t status {event_creation_status_t::failed_fatal}; + /** The errno value returned by perf_event_open, if status is failed_fatal */ + boost::system::error_code perf_errno {}; + /** An optional error message for failed_fatal */ + std::optional error_message {}; + + explicit event_creation_result_t(event_creation_status_t status) : status(status) {} + + explicit event_creation_result_t(boost::system::error_code perf_errno, + std::optional error_message = {}) + : perf_errno(perf_errno), error_message(std::move(error_message)) + { + } + + event_creation_result_t(perf_event_id_t perf_id, std::shared_ptr fd) + : perf_id(perf_id), fd(std::move(fd)), status(event_creation_status_t::success) + { + } + }; + + perf_activator_t(std::shared_ptr conf, boost::asio::io_context & context) + : capture_configuration(std::move(conf)), + context(context), + perf_event_printer(capture_configuration->cpuid_to_core_name, + capture_configuration->per_core_cpuids, + capture_configuration->perf_pmu_type_to_name) + { + } + + /** @return True if the kernel is old and requires using 'read' to determine the ID of events in a group */ + [[nodiscard]] bool is_legacy_kernel_requires_id_from_read() const; + + /** + * Using the legacy method, read the IDs for a set of one or more events in a group. + * + * @param read_format The read_format for the group leader (or single) attribute + * @param group_fd The group (or single event) file descriptor + * @param nr_ids The number of events in the group (which must be >= 1) + * @return a pair containing the status code and a vector containing some ids being the ids read + */ + [[nodiscard]] static std::pair> + read_legacy_ids(std::uint64_t read_format, int group_fd, std::size_t nr_ids); + + /** + * Create the new event, but do not start it. The event is created in a disabled state, and its fd and perf id are returned. + * + * @param event The event to create + * @param enable_state Configures the disable and enable_on_exec flags for the event + * @param core_no The CPU no to use for the event + * @param pid The PID to use for the event + * @param group_fd The group leader fd, or -1 + * @return A `activation_result_t`, containing the status flag, perf id and file descriptor. + * When the status flag is failed_fatal, the perf id and fd must be ignored and a fatal + * error occured and the binding will move to failed state. `perf_errno` will be set to + * reflect the error state after the call to perf_event_open. + * When the status flag is failed_offline, the perf id and fd must be ignored and a + * non-fatal error occured and the binding will remain in offline state. + * When the status flag is failed_invalid_pid, the perf id and fd must be ignored and + * a possibly non-fatal error occured due to the pid provided was not valid, and the + * binding will move to terminated or failed state. + * When the status flag is failed_invalid_device, the perf id and fd must be ignored and + * a possibly non-fatal error occured due to perf_event_open returning ENOENT, and + * the binding will move to terminated or failed state. + * Otherwise, the status flag is success and the perf id and fd is set and the event is successfully created and the binding can move to ready state. + */ + [[nodiscard]] event_creation_result_t create_event(event_definition_t const & event, + enable_state_t enable_state, + core_no_t core_no, + pid_t pid, + int group_fd); + + /** + * Redirect mmap output from one fd to another + * + * @param fd The fd to redirect + * @param output_fd the target fd + * @return true succes + * @return false failure + */ + [[nodiscard]] bool set_output(int fd, int output_fd); + + /** + * MMap the ringbuffer for the provided file descriptor. + * + * @note This method is only for the data region, use + * mmap_aux(perf_ringbuffer_mmap_t const & mmap, core_no_t core_no, int fd) for the aux region + * @param core_no The core no associated with the mapping + * @param fd The event file descriptor + * @return The mapping object + */ + [[nodiscard]] perf_ringbuffer_mmap_t mmap_data(core_no_t core_no, int fd); + + /** + * MMaps the aux region for the provided file descriptor. + * + * @note This method is only for the aux region, use mmap_data(core_no_t core_no, int fd) for the data region + * @param mmap Reference to ringbuffer mmap object, this will be updated with the new region if mmap-ing is + * successful + * @param core_no The core no associated with the mapping + * @param fd The event file descriptor + */ + void mmap_aux(perf_ringbuffer_mmap_t & mmap, core_no_t core_no, int fd); + + /** + * Enable the event, so that it starts producing data. + * + * @param fd The event file descriptor + * @retval true The event was enabled without error + * @retval false The event could not be enabled due to some error (which usually implies the event is invalid) + */ + [[nodiscard]] bool start(int fd); + + /** + * Disable the event, so that it stops producing data (but it is not removed, so could be started again). + * + * @param fd The event file descriptor + * @retval true The event was disabled without error + * @retval false The event could not be disabled due to some error (which usually implies the event is invalid) + */ + bool stop(int fd); + + /** + * Re-enable a single event (for example an AUX fd that was disabled on buffer full) + * + * @param fd The event file descriptor + * @retval true The event was enabled without error + * @retval false The event could not be enabled due to some error (which usually implies the event is invalid) + */ + bool re_enable(int fd); + + private: + std::shared_ptr capture_configuration; + boost::asio::io_context & context; + perf_event_printer_t perf_event_printer; + }; +} diff --git a/daemon/agents/perf/events/perf_event_utils.cpp b/daemon/agents/perf/events/perf_event_utils.cpp new file mode 100644 index 00000000..cb7e0a0b --- /dev/null +++ b/daemon/agents/perf/events/perf_event_utils.cpp @@ -0,0 +1,91 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#include "agents/perf/events/perf_event_utils.hpp" + +#include "lib/Assert.h" +#include "lib/EnumUtils.h" +#include "lib/Format.h" + +#include +#include + +namespace agents::perf { + + char const * perf_event_printer_t::map_core_cluster_name(core_no_t core_no) + { + auto index = lib::toEnumValue(core_no); + runtime_assert(((index >= 0) && (std::size_t(index) < per_core_cpuids.size())), "Unexpected core no"); + auto cpuid = per_core_cpuids[index]; + auto it = cpuid_to_core_name.find(cpuid); + if (it == cpuid_to_core_name.end()) { + return "Unknown"; + } + return it->second.c_str(); + } + + char const * perf_event_printer_t::map_custom_pmu_type(std::uint32_t type, core_no_t core_no) + { + // use provided label? + auto it = perf_pmu_type_to_name.find(type); + if (it != perf_pmu_type_to_name.end()) { + return it->second.c_str(); + } + + // lookup core name + return map_core_cluster_name(core_no); + } + + char const * perf_event_printer_t::map_attr_type(std::uint32_t type, core_no_t core_no) + { + switch (type) { + case PERF_TYPE_HARDWARE: + return "cpu"; + case PERF_TYPE_BREAKPOINT: + return "breakpoint"; + case PERF_TYPE_HW_CACHE: + return "hw-cache"; + case PERF_TYPE_RAW: + return map_core_cluster_name(core_no); + case PERF_TYPE_SOFTWARE: + return "software"; + case PERF_TYPE_TRACEPOINT: + return "tracepoint"; + default: { + if (type < PERF_TYPE_MAX) { + return "?"; + } + return map_custom_pmu_type(type, core_no); + } + } + } + + std::string perf_event_printer_t::perf_attr_to_string(perf_event_attr const & attr, + core_no_t core_no, + char const * indentation, + char const * separator) + { + return (lib::Format() << indentation << "type: " << attr.type // + << " (" << map_attr_type(attr.type, core_no) << ")" << separator // + << indentation << "config: " << attr.config << separator // + << indentation << "config1: " << attr.config1 << separator // + << indentation << "config2: " << attr.config2 << separator // + << indentation << "sample: " << attr.sample_period << separator << std::hex // + << indentation << "sample_type: 0x" << attr.sample_type << separator // + << indentation << "read_format: 0x" << attr.read_format << separator << std::dec // + << indentation << "pinned: " << (attr.pinned ? "true" : "false") << separator // + << indentation << "mmap: " << (attr.mmap ? "true" : "false") << separator // + << indentation << "comm: " << (attr.comm ? "true" : "false") << separator // + << indentation << "freq: " << (attr.freq ? "true" : "false") << separator // + << indentation << "task: " << (attr.task ? "true" : "false") << separator // + << indentation << "exclude_kernel: " << (attr.exclude_kernel ? "true" : "false") + << separator // + << indentation << "enable_on_exec: " << (attr.enable_on_exec ? "true" : "false") + << separator // + << indentation << "inherit: " << (attr.inherit ? "true" : "false") << separator // + << indentation << "sample_id_all: " << (attr.sample_id_all ? "true" : "false") + << separator // + << indentation << "sample_regs_user: 0x" << std::hex << attr.sample_regs_user << separator + << std::dec // + << indentation << "aux_watermark: " << attr.aux_watermark << separator); + } +} diff --git a/daemon/agents/perf/events/perf_event_utils.hpp b/daemon/agents/perf/events/perf_event_utils.hpp new file mode 100644 index 00000000..966b0096 --- /dev/null +++ b/daemon/agents/perf/events/perf_event_utils.hpp @@ -0,0 +1,60 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "agents/perf/events/types.hpp" +#include "k/perf_event.h" + +#include +#include +#include +#include + +namespace agents::perf { + + /** + * A class that can be used to stringify various aspects of a perf event + */ + class perf_event_printer_t { + public: + constexpr perf_event_printer_t(std::map const & cpuid_to_core_name, + std::vector const & per_core_cpuids, + std::map const & perf_pmu_type_to_name) + : per_core_cpuids(per_core_cpuids), + cpuid_to_core_name(cpuid_to_core_name), + perf_pmu_type_to_name(perf_pmu_type_to_name) + { + } + + /** + * To map the type field for some event to a string name for the associated PMU + * + * @param type The PMU type code + * @param core_no The core number associated with the event + * @return The name of the PMU + */ + [[nodiscard]] char const * map_attr_type(std::uint32_t type, core_no_t core_no); + + /** + * Format a perf_event_attr to a string (for logging, errors) + * + * @param attr The attr to log + * @param core_no The core no associated with the event + * @param indentation For indenting each element + * @param separator For separating each element + * @return The attr string + */ + [[nodiscard]] std::string perf_attr_to_string(perf_event_attr const & attr, + core_no_t core_no, + char const * indentation, + char const * separator); + + private: + std::vector const & per_core_cpuids; + std::map const & cpuid_to_core_name; + std::map const & perf_pmu_type_to_name; + + [[nodiscard]] char const * map_core_cluster_name(core_no_t core_no); + [[nodiscard]] char const * map_custom_pmu_type(std::uint32_t type, core_no_t core_no); + }; +} diff --git a/daemon/agents/perf/events/perf_ringbuffer_mmap.hpp b/daemon/agents/perf/events/perf_ringbuffer_mmap.hpp new file mode 100644 index 00000000..df3b4560 --- /dev/null +++ b/daemon/agents/perf/events/perf_ringbuffer_mmap.hpp @@ -0,0 +1,124 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "k/perf_event.h" +#include "lib/Assert.h" +#include "lib/Span.h" +#include "lib/Syscall.h" + +#include +#include + +#include +#include + +namespace agents::perf { + + class mmap_ptr_t { + public: + using value_type = char; + using size_type = std::size_t; + + mmap_ptr_t() = default; + + mmap_ptr_t(void * mmap, std::size_t length) + // NOLINTNEXTLINE(performance-no-int-to-ptr) + : mmap(mmap != MAP_FAILED ? mmap : nullptr), length(length) + { + } + + mmap_ptr_t(mmap_ptr_t const &) = delete; + mmap_ptr_t & operator=(mmap_ptr_t const &) = delete; + mmap_ptr_t(mmap_ptr_t && that) noexcept + : mmap(std::exchange(that.mmap, nullptr)), length(std::exchange(that.length, 0)) + { + } + mmap_ptr_t & operator=(mmap_ptr_t && that) noexcept + { + if (this != &that) { + mmap_ptr_t tmp {std::move(that)}; + std::swap(mmap, tmp.mmap); + std::swap(length, tmp.length); + } + return *this; + } + ~mmap_ptr_t() noexcept + { + auto * mmap = std::exchange(this->mmap, nullptr); + auto length = std::exchange(this->length, 0); + + if (mmap != nullptr) { + lib::munmap(mmap, length); + } + } + + template + [[nodiscard]] T * get_as() + { + return reinterpret_cast(mmap); + } + + template + [[nodiscard]] T const * get_as() const + { + return reinterpret_cast(mmap); + } + + [[nodiscard]] lib::Span as_span() const { return {data(), size()}; } + + [[nodiscard]] char * data() { return reinterpret_cast(mmap); } + [[nodiscard]] char const * data() const { return reinterpret_cast(mmap); } + [[nodiscard]] size_type size() const { return length; } + + [[nodiscard]] bool operator==(std::nullptr_t) const { return (mmap == nullptr) || (length == 0); } + [[nodiscard]] bool operator!=(std::nullptr_t) const { return (mmap != nullptr) && (length != 0); } + [[nodiscard]] explicit operator bool() const { return (mmap != nullptr) && (length != 0); } + + private: + void * mmap = nullptr; + std::size_t length = 0; + }; + + class perf_ringbuffer_mmap_t { + public: + perf_ringbuffer_mmap_t() = default; + + perf_ringbuffer_mmap_t(std::size_t page_size, mmap_ptr_t data_mapping, mmap_ptr_t aux_mapping = {}) + : page_size(page_size), data_mapping(std::move(data_mapping)), aux_mapping(std::move(aux_mapping)) + { + } + + [[nodiscard]] bool has_data() const { return !!data_mapping; } + + [[nodiscard]] bool has_aux() const { return !!aux_mapping && has_data(); } + + [[nodiscard]] perf_event_mmap_page * header() { return data_mapping.get_as(); } + [[nodiscard]] perf_event_mmap_page const * header() const + { + return data_mapping.get_as(); + } + + [[nodiscard]] lib::Span aux_span() const { return aux_mapping.as_span(); } + + [[nodiscard]] lib::Span data_span() const + { + if (!data_mapping) { + return {}; + } + + return {reinterpret_cast(data_mapping.data() + page_size), data_mapping.size() - page_size}; + } + + void set_aux_mapping(mmap_ptr_t mapping) + { + runtime_assert(has_data(), "Data region must be mapped before aux"); + aux_mapping = std::move(mapping); + } + + private: + std::size_t page_size = 0; + mmap_ptr_t data_mapping {}; + mmap_ptr_t aux_mapping {}; + }; +} diff --git a/daemon/agents/perf/events/types.hpp b/daemon/agents/perf/events/types.hpp new file mode 100644 index 00000000..90868ee7 --- /dev/null +++ b/daemon/agents/perf/events/types.hpp @@ -0,0 +1,28 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include + +namespace agents::perf { + enum class core_no_t : int; + + enum class gator_key_t : int { + magic_key_timestamp = 0, + magic_key_tid = 1, + magic_key_core = 2, + first_free_key = 4, + }; + + enum class cpu_cluster_id_t : int { + invalid = -1, + }; + + enum class uncore_pmu_id_t : int { + invalid = -1, + }; + + enum class perf_event_id_t : std::uint64_t { + invalid = 0, + }; +} diff --git a/daemon/agents/perf/frame_encoder.h b/daemon/agents/perf/frame_encoder.h deleted file mode 100644 index 578b31a6..00000000 --- a/daemon/agents/perf/frame_encoder.h +++ /dev/null @@ -1,205 +0,0 @@ -/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ - -#pragma once - -#include "BufferUtils.h" -#include "ISender.h" -#include "agents/perf/async_buffer_builder.h" -#include "agents/perf/record_types.h" -#include "async/async_buffer.hpp" -#include "async/completion_handler.h" -#include "async/continuations/async_initiate.h" -#include "async/continuations/continuation.h" -#include "async/continuations/continuation_of.h" -#include "async/continuations/operations.h" -#include "async/continuations/use_continuation.h" -#include "lib/Span.h" - -#include -#include -#include -#include - -#include -#include -#include - -namespace agents::perf { - - /** - * Instances of frame_encoder_t are responsible for writing perf data records (events from the main - * ring buffer, or data blocks from the aux ring buffer) into an asynchronous buffer. This hides - * the complexity of: - * 1. working out how much space will be needed; - * 2. requesting that amount of space from the async buffer; - * 3. waiting for that space to be available; - * 4. writing the record into the space that was allocated. - * - * @tparam Executor The Boost Asio executor type that will be used to dispatch async requests. - * @tparam RecordType The type that represents the perf data to be encoded. - * @tparam Encoder A type that is capable of consuming an object of RecordType and writing it - * into a buffer region. - */ - template - class frame_encoder_t : public std::enable_shared_from_this> { - private: - // A type that keeps track of where we are reading from as the async operations progress. - struct record_index_t { - // which record in the span are we currently consuming? - std::size_t record_number; - // how far into that record did we get? - std::size_t offset_in_record; - - // moves to the next record in the span - void next() - { - record_number++; - offset_in_record = 0; - } - }; - - // A type that holds the state that's needed by each async step in the state machine. - struct task_ctx_t { - std::shared_ptr self; - std::shared_ptr send_buffer; - int cpu; - std::uint64_t tail_pointer; - lib::Span records; - record_index_t index; - }; - - Executor & executor; - Encoder encoder; - std::atomic task_running; - - void finish_current_task(task_ctx_t & task) - { - task.send_buffer.reset(); - task_running.store(false, std::memory_order_release); - } - - auto co_handle_emit_record(task_ctx_t task, - async::async_buffer_t::mutable_buffer_type buffer, - async::async_buffer_t::commit_action_t action) - { - using namespace async::continuations; - - auto & record = task.records[task.index.record_number]; - - auto new_offset = encoder.encode_into(buffer, - std::move(action), - record, - task.cpu, - task.tail_pointer, - task.index.offset_in_record); - - task.index.offset_in_record = new_offset; - - // loop back and read the next chunk - return start_on(executor) // - | then([st = this->shared_from_this(), task = std::move(task)]() mutable { - return st->co_request_space_for_index(std::move(task)); - }); - } - - async::continuations::polymorphic_continuation_t - co_request_space_for_index(task_ctx_t task) - { - using namespace async::continuations; - - // if we've reached the last record call the completion handler - if (task.index.record_number >= task.records.size()) { - // call this first in case the handler decides to throw - finish_current_task(task); - return start_with(boost::system::error_code {}, task.index.record_number); - } - - auto & record = task.records[task.index.record_number]; - - // if we've finished sending this record move to the next one - if (task.index.offset_in_record >= record.number_of_elements()) { - task.index.next(); - return co_request_space_for_index(std::move(task)); - } - - // how much buffer space do we need for this record? - const int bytes_requested = encoder.get_bytes_required(record, task.index.offset_in_record); - const auto amount_to_request = - Encoder::max_header_size + std::min(bytes_requested, Encoder::max_payload_size); - - auto sb = task.send_buffer; - - return sb->async_request_space(amount_to_request, use_continuation) // - | then([st = this->shared_from_this(), - task = std::move(task)](bool success, auto buffer, auto commit_action) mutable - -> polymorphic_continuation_t { - if (!success) { - // clean this up first in case the handler decides to throw - st->finish_current_task(task); - return start_with(boost::asio::error::make_error_code(boost::asio::error::no_memory), - task.index.record_number); - } - return st->co_handle_emit_record(std::move(task), std::move(buffer), std::move(commit_action)); - }); - } - - template - auto async_encode(task_ctx_t task, CompletionToken && token) - { - using namespace async::continuations; - - return async_initiate>( - [st = this->shared_from_this(), task = std::move(task)]() mutable { - // don't allow 2 tasks to run concurrently. Even though they're dispatched on the same executor - // the individual steps in the state machine would end up interleaved and cause problems. - return start_with() // - | do_if_else([st]() { return st->task_running.exchange(true); }, - [st]() { - return start_with( - boost::asio::error::make_error_code(boost::asio::error::already_started), - 0); - }, - [st, task = std::move(task)]() mutable { - return start_on(st->executor) // - | then([st, task = std::move(task)]() mutable { - return st->co_request_space_for_index(std::move(task)); - }); - }); - }, - std::forward(token)); - } - - public: - explicit frame_encoder_t(Executor & executor) : executor(executor), task_running(false) {} - - /** - * Asynchronously encode an array of perf data into an asynchronous buffer. - * - * @param send_buffer The async buffer from which space will be allocated. - * @param cpu The index of the CPU from which the data were sampled. - * @param tail_pointer A snapshot of the ring buffer's tail pointer at the time the records - * were taken. - * @param records A span of objects representing the data to be consumed from the perf - * ring buffer. - */ - template - auto async_encode(std::shared_ptr send_buffer, - int cpu, - std::uint64_t tail_pointer, - lib::Span records, - CompletionToken && token) - { - return async_encode( - task_ctx_t { - this->shared_from_this(), - std::move(send_buffer), - cpu, - tail_pointer, - std::move(records), - {0, 0}, - }, - std::forward(token)); - } - }; - -} diff --git a/daemon/agents/perf/perf_agent.h b/daemon/agents/perf/perf_agent.h new file mode 100644 index 00000000..97301dc7 --- /dev/null +++ b/daemon/agents/perf/perf_agent.h @@ -0,0 +1,103 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "Logging.h" +#include "agents/agent_environment.h" +#include "agents/perf/capture_configuration.h" +#include "async/continuations/operations.h" +#include "async/continuations/use_continuation.h" +#include "ipc/messages.h" +#include "ipc/raw_ipc_channel_sink.h" +#include "ipc/raw_ipc_channel_source.h" +#include "lib/exception.h" + +#include + +#include + +namespace agents::perf { + + template + class perf_agent_t : public std::enable_shared_from_this> { + public: + using accepted_message_types = std::tuple; + + using capture_factory = + std::function(boost::asio::io_context &, + async::proc::process_monitor_t & process_monitor, + std::shared_ptr, + agent_environment_base_t::terminator, + std::shared_ptr)>; + + static std::shared_ptr> create(boost::asio::io_context & io, + async::proc::process_monitor_t & process_monitor, + std::shared_ptr sink, + agent_environment_base_t::terminator terminator, + capture_factory factory) + { + return std::shared_ptr>(new perf_agent_t(io, + process_monitor, + std::move(sink), + std::move(terminator), + std::move(factory))); + } + + async::continuations::polymorphic_continuation_t<> co_shutdown() + { + using namespace async::continuations; + + return start_with() // + | then([st = this->shared_from_this()]() -> polymorphic_continuation_t<> { + if (st->capture) { + return st->capture->async_shutdown(use_continuation); + } + return {}; + }); + } + + async::continuations::polymorphic_continuation_t<> co_receive_message(ipc::msg_start_t msg) + { + return capture->async_on_received_start_message(msg.header, async::continuations::use_continuation); + } + + async::continuations::polymorphic_continuation_t<> co_receive_message(ipc::msg_capture_configuration_t msg) + { + using namespace async::continuations; + + return start_with() // + | then([st = this->shared_from_this(), msg = std::move(msg)]() mutable { + LOG_DEBUG("Got capture config message"); + st->capture = st->wrapped_factory(std::move(msg)); + + // wrapped_factory is one-shot as its contents are moved, so make that explicit by defaulting it + st->wrapped_factory = {}; + + return st->capture->async_prepare(async::continuations::use_continuation); + }); + } + + private: + std::function(ipc::msg_capture_configuration_t)> wrapped_factory; + std::shared_ptr capture; + + perf_agent_t(boost::asio::io_context & io, + async::proc::process_monitor_t & process_monitor, + std::shared_ptr sink, + agent_environment_base_t::terminator terminator, + capture_factory factory) + : wrapped_factory {[sink = std::move(sink), // + terminator = std::move(terminator), + factory = std::move(factory), + &io, + &process_monitor](auto msg) mutable { + return factory(io, + process_monitor, + std::move(sink), + std::move(terminator), + parse_capture_configuration_msg(std::move(msg))); + }} + { + } + }; +} diff --git a/daemon/agents/perf/perf_agent_main.cpp b/daemon/agents/perf/perf_agent_main.cpp new file mode 100644 index 00000000..db23b810 --- /dev/null +++ b/daemon/agents/perf/perf_agent_main.cpp @@ -0,0 +1,39 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#include "agents/agent_environment.h" +#include "agents/perf/perf_agent.h" +#include "agents/perf/perf_capture.h" +#include "ipc/raw_ipc_channel_sink.h" + +#include + +namespace agents::perf { + + namespace { + using agent_type = perf_agent_t; + + auto agent_factory(boost::asio::io_context & io, + async::proc::process_monitor_t & process_monitor, + std::shared_ptr sink, + agent_environment_base_t::terminator terminator) + { + return agent_type::create(io, + process_monitor, + std::move(sink), + std::move(terminator), + perf_capture_t::create); + }; + } + + int perf_agent_main(char const * /*argv0*/, lib::Span args) + { + return start_agent(args, [](auto /*args*/, auto & io, auto & pm, auto ipc_sink, auto ipc_source) { + return agent_environment_t::create("gator-agent-perf", + io, + pm, + agent_factory, + std::move(ipc_sink), + std::move(ipc_source)); + }); + } +} diff --git a/daemon/agents/perf/perf_agent_main.h b/daemon/agents/perf/perf_agent_main.h new file mode 100644 index 00000000..0a9aaa71 --- /dev/null +++ b/daemon/agents/perf/perf_agent_main.h @@ -0,0 +1,13 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include + +namespace agents::perf { + + /** + * Perf agent entry point. + */ + int perf_agent_main(char const * argv0, lib::Span args); +} diff --git a/daemon/agents/perf/perf_agent_worker.h b/daemon/agents/perf/perf_agent_worker.h new file mode 100644 index 00000000..496d430e --- /dev/null +++ b/daemon/agents/perf/perf_agent_worker.h @@ -0,0 +1,305 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ +#pragma once + +#include "agents/agent_worker_base.h" +#include "async/continuations/async_initiate.h" +#include "async/continuations/operations.h" +#include "async/continuations/stored_continuation.h" +#include "async/continuations/use_continuation.h" + +#include + +namespace agents::perf { + + /** + * An interface to an object that can be used to send commands to the perf capture process. + * This allows, for example, a shell-side signal handler to request that the agent stops capturing + * and terminates in a clean way. + */ + class perf_capture_controller_t { + public: + virtual ~perf_capture_controller_t() = default; + + /** + * Request that the perf agent process starts the capture. + * Note that the completion handler will be called with a boolean that shows whether the + * command was sent successfully. This does not necessarily mean the capture has actually + * started successfully. That will be indicated by follow-up IPC messages sent from the + * agent. + */ + template + auto async_start_capture(std::uint64_t monotonic_start, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate_explicit( + [this, monotonic_start](auto && sc) { + this->async_start_capture(monotonic_start, + stored_continuation_t {std::forward(sc)}); + }, + std::forward(token)); + } + + /** + * Request that the perf agent stop capturing. The commpletion handler will be called with + * a boolean that shows whether the the message was sent successfully. Further IPC messages + * will be sent from the agent as it performs cleanup & shutdown. + */ + template + auto async_stop_capture(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate_explicit( + [this](auto && sc) { + this->async_stop_capture(stored_continuation_t<> {std::forward(sc)}); + }, + std::forward(token)); + } + + protected: + virtual void async_start_capture(std::uint64_t monotonic_start, + async::continuations::stored_continuation_t handler_ref) = 0; + + virtual void async_stop_capture(async::continuations::stored_continuation_t<> handler_ref) = 0; + }; + + /** + * The shell-side controller that monitors and communicates with the perf agent process. + * + * @tparam EventObserver The object that is to be notified of significant events from the agent + * process. E.g. startup/shutdown & APC frame delivery. The object is expected to conform to: + * + * class foo_observer { + * public: + * // called once the worker has been initialised by the controller + * void set_controller(std::unique_ptr); + * + * // called when an APC frame message is received from the agent. the data + * // buffer is passed to the function. + * void on_apc_frame_received(const std::vector&); + * }; + * + */ + template + class perf_agent_worker_t : public agents::agent_worker_base_t, + public std::enable_shared_from_this> { + public: + static constexpr const char * get_agent_process_id() { return agent_id_perf.data(); } + + perf_agent_worker_t(boost::asio::io_context & io, + agent_process_t && agent_process, + state_change_observer_t && state_change_observer, + EventObserver & observer, + ipc::msg_capture_configuration_t capture_config) + : agent_worker_base_t(std::move(agent_process), std::move(state_change_observer)), + strand(io), + observer(observer), + capture_config(std::move(capture_config)) + { + } + + protected: + [[nodiscard]] boost::asio::io_context::strand & work_strand() override { return strand; } + + private: + /** + * An implementation of the capture controller interface that will allow the event observer + * to send messages to the agent without having a cyclic dependency between observer & worker. + */ + class capture_controller_t : public perf_capture_controller_t { + using ParentType = perf_agent_worker_t; + + public: + explicit capture_controller_t(std::shared_ptr parent) : parent(std::move(parent)) {} + + ~capture_controller_t() override = default; + + protected: + void async_start_capture(std::uint64_t monotonic_start, + async::continuations::stored_continuation_t sc) override + { + parent->sink().async_send_message( + ipc::msg_start_t {monotonic_start}, + [sc = std::move(sc), p = parent](const auto & ec, const auto & /*msg*/) mutable { + if (ec) { + LOG_ERROR("Error starting perf capture: %s", ec.message().c_str()); + return resume_continuation(p->work_strand().context(), std::move(sc), false); + } + + return resume_continuation(p->work_strand().context(), std::move(sc), true); + }); + } + + void async_stop_capture(async::continuations::stored_continuation_t<> sc) override + { + return submit(parent->work_strand().context(), parent->co_shutdown(), std::move(sc)); + } + + private: + std::shared_ptr parent; + }; + + friend class capture_controller_t; + + boost::asio::io_context::strand strand; + EventObserver & observer; + ipc::msg_capture_configuration_t capture_config; + + auto co_shutdown() + { + using namespace async::continuations; + + return start_on(strand) // + | then([self = this->shared_from_this()]() -> polymorphic_continuation_t<> { + if (!self->transition_state(state_t::shutdown_requested)) { + return {}; + } + + LOG_DEBUG("Sending shutdown message to agent process"); + return self->sink().async_send_message(ipc::msg_shutdown_t {}, use_continuation) + | then([self](const auto & /*ec*/, const auto & /*msg*/) {}); + }); + } + + static auto co_receive_message(const std::monostate & /*msg*/) + { + LOG_ERROR("Unexpected monostate message received in perf worker."); + } + + /** + * Handle the 'ready' message - the agent has started and is waiting to be configured. + */ + auto co_receive_message(const ipc::msg_ready_t & /*msg*/) + { + using namespace async::continuations; + + LOG_DEBUG("Perf agent reported that it's ready - sending config message"); + transition_state(state_t::ready); + // send the config message to prepare the agent for the capture + return start_on(strand) // + | sink().async_send_message(capture_config, use_continuation) + | then( + [self = this->shared_from_this()](const auto & ec, + const auto & /*msg*/) mutable -> polymorphic_continuation_t<> { + if (ec) { + LOG_ERROR("Failed to send the configuration to the perf agent process: %s", + ec.message().c_str()); + return self->co_shutdown(); + } + return {}; + }); + } + + /** + * Handle the 'capture ready' message - the agent has been configured and is prepared to + * start the capture. + */ + auto co_receive_message(const ipc::msg_capture_ready_t & /*msg*/) + { + LOG_DEBUG("Perf agent is prepared for capture"); + observer.on_capture_ready(); + } + + /** + * Handle the shutdown message - the agent has stopped capturing and the process is about + * to terminate. + */ + auto co_receive_message(const ipc::msg_shutdown_t & /*msg*/) + { + LOG_DEBUG("Perf agent has shut down."); + transition_state(state_t::shutdown_received); + return async::continuations::start_with(); + } + + auto co_receive_message(ipc::msg_apc_frame_data_t && msg) + { + observer.on_apc_frame_received(std::move(msg.suffix)); + } + + auto co_receive_message(ipc::msg_exec_target_app_t const & /*msg*/) { observer.exec_target_app(); } + + auto co_receive_message(ipc::msg_capture_failed_t const & msg) { observer.on_capture_failed(msg.header); } + + auto co_receive_message(ipc::msg_capture_started_t const & /*msg*/) { observer.on_capture_started(); } + + public: + [[nodiscard]] bool start() + { + using namespace async::continuations; + using namespace ipc; + + LOG_DEBUG("starting perf agent worker"); + observer.set_controller(std::make_unique(this->shared_from_this())); + + auto self = this->shared_from_this(); + + spawn("Perf shell message loop", + repeatedly( + [self]() { + // don't stop until the agent terminates and closes the connection from its end + LOG_DEBUG("Receive loop would have terminated? %d", + (self->get_state() >= state_t::terminated_pending_message_loop)); + + return true; + }, + [self]() { + return async_receive_one_of(self->source_shared(), + use_continuation) + | map_error() // + | post_on(self->strand) // + | unpack_variant([self](auto && msg) mutable { + return self->co_receive_message(std::forward(msg)); + }); + }), + [self](bool failed) { + LOG_DEBUG("Receive loop ended"); + + boost::asio::post(self->strand, [self]() { self->set_message_loop_terminated(); }); + + if (failed) { + self->shutdown(); + } + }); + + return this->exec_agent(); + } + + void shutdown() override + { + using namespace async::continuations; + + LOG_DEBUG("perf worker shutdown called"); + + spawn("Perf worker shutdown", co_shutdown()); + } + + void on_sigchild() override + { + using namespace async::continuations; + + LOG_DEBUG("perf worker: got sigchld"); + + auto self = this->shared_from_this(); + + spawn("sigchld handler for perf shell", + start_on(strand) // + | then([self]() { + self->transition_state(state_t::terminated); + self->observer.on_capture_completed(); + }), + [self](bool failed) { + if (failed) { + self->shutdown(); + } + }); + } + }; + +} diff --git a/daemon/agents/perf/perf_buffer_consumer.cpp b/daemon/agents/perf/perf_buffer_consumer.cpp index e354f481..4a9e52fd 100644 --- a/daemon/agents/perf/perf_buffer_consumer.cpp +++ b/daemon/agents/perf/perf_buffer_consumer.cpp @@ -2,52 +2,266 @@ #include "agents/perf/perf_buffer_consumer.h" -#include "GatorException.h" +#include "BufferUtils.h" +#include "ISender.h" +#include "agents/perf/async_buffer_builder.h" +#include "agents/perf/perf_frame_packer.hpp" +#include "async/continuations/continuation.h" +#include "async/continuations/stored_continuation.h" +#include "ipc/messages.h" +#include "k/perf_event.h" +#include "lib/Assert.h" +#include "lib/error_code_or.hpp" -#include +#include namespace agents::perf { + namespace { + /** Read the aux_head/data_head field from the header */ + template<__u64 perf_event_mmap_page::*Field> + [[nodiscard]] static constexpr __u64 atomic_load_field(perf_event_mmap_page * header) + { + return __atomic_load_n(&(header->*Field), __ATOMIC_ACQUIRE); + } - namespace detail { - - void validate(const buffer_config_t & config) + /** Write the aux_tail/data_tail field to the header */ + template<__u64 perf_event_mmap_page::*Field> + static constexpr void atomic_store_field(perf_event_mmap_page * header, __u64 value) { - if (((config.page_size - 1) & config.page_size) != 0) { - LOG_ERROR("buffer_config_t.page_size (%zu) must be a power of 2", config.page_size); - throw GatorException("Non power of 2 page size"); - } - if (((config.data_buffer_size - 1) & config.data_buffer_size) != 0) { - LOG_ERROR("buffer_config_t.data_buffer_size (%zu) must be a power of 2", config.data_buffer_size); - throw GatorException("Non power of 2 data buffer size"); - } - if (config.data_buffer_size < config.page_size) { - LOG_ERROR( - "buffer_config_t.data_buffer_size (%zu) must be a multiple of buffer_config_t.page_size (%zu)", - config.data_buffer_size, - config.page_size); - throw GatorException("Data buffer must be a multiple of page size"); - } + __atomic_store_n(&(header->*Field), value, __ATOMIC_RELEASE); + } + } - if (((config.aux_buffer_size - 1) & config.aux_buffer_size) != 0) { - LOG_ERROR("buffer_config_t.aux_buffer_size (%zu) must be a power of 2", config.aux_buffer_size); - throw GatorException("Aux buffer size must be a power of 2"); - } - if ((config.aux_buffer_size < config.page_size) && (config.aux_buffer_size != 0)) { - LOG_ERROR("buffer_config_t.aux_buffer_size (%zu) must be a multiple of buffer_config_t.page_size (%zu)", - config.aux_buffer_size, - config.page_size); - throw GatorException("Aux buffer must be a multiple of page size"); + async::continuations::polymorphic_continuation_t + perf_buffer_consumer_t::do_send_msg(std::shared_ptr const & st, + int cpu, + std::vector buffer, + std::uint64_t head, + std::uint64_t tail) + { + using namespace async::continuations; + + LOG_TRACE("Sending IPC message for cpu=%d , head=%" PRIu64 " , tail=%" PRIu64 " , size=%zu", + cpu, + head, + tail, + buffer.size()); + + // update the running total (for one-shot mode) + st->cumulative_bytes_sent_apc_frames.fetch_add(buffer.size(), std::memory_order_acq_rel); + + // send one-shot notification? + if (st->is_one_shot_full()) { + stored_continuation_t<> sc {std::move(st->one_shot_mode_observer)}; + if (sc) { + resume_continuation(st->strand.context(), std::move(sc)); } } + + // send the message + return st->ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(buffer)}, use_continuation) // + | then([head, tail](auto ec, auto /*msg*/) { + LOG_TRACE("... sent, ec=%s , head=%" PRIu64 " , tail=%" PRIu64, ec.message().c_str(), head, tail); + + return std::make_tuple(head, tail, ec); + }) + | unpack_tuple(); } - perf_buffer_consumer_t::perf_buffer_consumer_t(boost::asio::io_context & io, buffer_config_t config) - : strand(io), - config(config), - data_encoder {std::make_shared(strand)}, - aux_encoder {std::make_shared(strand)} + template<__u64 perf_event_mmap_page::*HeadField, __u64 perf_event_mmap_page::*TailField, typename Op> + async::continuations::polymorphic_continuation_t + perf_buffer_consumer_t::do_send_common(std::shared_ptr const & st, + std::shared_ptr const & mmap, + int cpu, + Op && op) { - detail::validate(config); + using namespace async::continuations; + + auto * header = mmap->header(); + + std::uint64_t const head = atomic_load_field(header); + std::uint64_t const tail = (header->*TailField); + + LOG_TRACE("... cpu=%d , head=%" PRIu64 " , tail=%" PRIu64, cpu, head, tail); + + // no data, no error + if (head <= tail) { + return start_with(boost::system::error_code {}, false); + } + + // is the one-shot mode limit met, if so just skip the data + if (st->is_one_shot_full()) { + LOG_TRACE("... skipping (one-shot), cpu=%d , head=%" PRIu64 " , tail=%" PRIu64, cpu, head, tail); + atomic_store_field(mmap->header(), head); + + return start_with(boost::system::error_code {}, false); + } + + // iterate the data and send it + return start_with(head, tail, boost::system::error_code {}) // + | loop([](std::uint64_t head, + std::uint64_t tail, + boost::system::error_code ec) { return start_with((head > tail) && !ec, head, tail, ec); }, + [op = std::forward(op), st, mmap](std::uint64_t head, + std::uint64_t tail, + boost::system::error_code ec) { + return op(head, tail, ec) // + | post_on(st->strand.context()) // + | then([mmap](std::uint64_t h, std::uint64_t t, boost::system::error_code c) { + atomic_store_field(mmap->header(), std::min(h, t)); + return start_with(h, t, c); + }); + }) + | then([cpu, mmap](std::uint64_t head, std::uint64_t tail, boost::system::error_code ec) { + LOG_TRACE("... completed, cpu=%d , head=%" PRIu64 " , tail=%" PRIu64, cpu, head, tail); + atomic_store_field(mmap->header(), std::min(head, tail)); + return start_with(ec, true); + }); } + async::continuations::polymorphic_continuation_t + perf_buffer_consumer_t::do_send_aux_section(std::shared_ptr const & st, + std::shared_ptr const & mmap, + int cpu, + boost::system::error_code ec_from_data, + bool modified_from_data) + { + using namespace async::continuations; + + // just forward the error + if (ec_from_data) { + LOG_TRACE("Sending data for %d gave error %s", cpu, ec_from_data.message().c_str()); + return start_with(ec_from_data, modified_from_data); + } + + if (!mmap->has_aux()) { + return start_with(boost::system::error_code {}, modified_from_data); + } + + LOG_TRACE("Sending aux data for %d", cpu); + + return do_send_common<&perf_event_mmap_page::aux_head, &perf_event_mmap_page::aux_tail>( + st, + mmap, + cpu, + [st, mmap, cpu](std::uint64_t const header_head, + std::uint64_t const header_tail, + boost::system::error_code ec) + -> polymorphic_continuation_t { + // + LOG_TRACE("Sending aux chunk for cpu=%d , head=%" PRIu64 " , tail=%" PRIu64, + cpu, + header_head, + header_tail); + + auto const aux_buffer = mmap->aux_span(); + + if (header_head <= header_tail) { + return start_with(header_head, header_head, ec); + } + + // find the data to send + auto [first_span, second_span] = + extract_one_perf_aux_apc_frame_data_span_pair(aux_buffer, header_head, header_tail); + + // encode the message + auto [new_tail, buffer] = encode_one_perf_aux_apc_frame(cpu, first_span, second_span, header_tail); + + runtime_assert(!buffer.empty(), "Expected some apc frame data"); + + // send it + return do_send_msg(st, cpu, std::move(buffer), header_head, new_tail); + }); + } + + async::continuations::polymorphic_continuation_t + perf_buffer_consumer_t::do_send_data_section(std::shared_ptr const & st, + std::shared_ptr const & mmap, + int cpu) + { + + using namespace async::continuations; + + LOG_TRACE("Sending data for %d", cpu); + + return do_send_common<&perf_event_mmap_page::data_head, &perf_event_mmap_page::data_tail>( + st, + mmap, + cpu, + [st, mmap, cpu](std::uint64_t const header_head, + std::uint64_t const header_tail, + boost::system::error_code ec) + -> polymorphic_continuation_t { + LOG_TRACE("Sending data chunk for cpu=%d , head=%" PRIu64 " , tail=%" PRIu64, + cpu, + header_head, + header_tail); + + if (header_head <= header_tail) { + return start_with(header_head, header_head, ec); + } + + // encode the data into an apc frame + auto [new_tail, buffer] = + extract_one_perf_data_apc_frame(cpu, mmap->data_span(), header_head, header_tail); + + runtime_assert(!buffer.empty(), "Expected some apc frame data"); + + // send it + return do_send_msg(st, cpu, std::move(buffer), header_head, new_tail); + }); + } + + [[nodiscard]] async::continuations::polymorphic_continuation_t + perf_buffer_consumer_t::do_poll(std::shared_ptr const & st, + std::shared_ptr const & mmap, + int cpu) + { + using namespace async::continuations; + + // SDDAP-11384, read data before aux + + return do_send_data_section(st, mmap, cpu) // + | then([st, mmap, cpu](boost::system::error_code const & ec, bool modified) { + return do_send_aux_section(st, mmap, cpu, ec, modified); + }) // + | post_on(st->strand) // + | then([st, mmap, cpu](boost::system::error_code const & ec, + bool modified) mutable -> polymorphic_continuation_t { + // not removed / error path + if ((ec) || (st->removed_cpus.count(cpu) <= 0)) { + // mark it as no longer busy + st->busy_cpus.erase(cpu); + return start_with(ec); + } + + LOG_TRACE("Remove mmap flush for %d", cpu); + + // when removed, do it again repeatedly to flush any remainig data since the remove request (which may overlap the sending) + return start_with(boost::system::error_code {}, modified) + | loop( + [](boost::system::error_code const & ec, bool modified) { + LOG_TRACE("Remove send loop will iterate (modified=%u, ec=%s)", + modified, + ec.message().c_str()); + // only continue to iterate if no error and last iteration indicates modified ringbuffer data + return start_with(modified && !ec, ec, modified); + }, + [st, mmap, cpu](boost::system::error_code const & /*ec*/, bool /*modified*/) { + return do_send_data_section(st, mmap, cpu) // + | then([st, mmap, cpu](boost::system::error_code e, bool m) { + return do_send_aux_section(st, mmap, cpu, e, m); + }); + }) + | post_on(st->strand) // + | then([st, cpu](boost::system::error_code const & ec, bool /*modified*/) { + LOG_TRACE("Remove mmap completed for %d (poll ec =%s)", cpu, ec.message().c_str()); + // mark it as no longer busy + st->busy_cpus.erase(cpu); + // remove it + st->per_cpu_mmaps.erase(cpu); + return ec; + }); + }); + } } diff --git a/daemon/agents/perf/perf_buffer_consumer.h b/daemon/agents/perf/perf_buffer_consumer.h index 4813c9d2..47418076 100644 --- a/daemon/agents/perf/perf_buffer_consumer.h +++ b/daemon/agents/perf/perf_buffer_consumer.h @@ -2,287 +2,315 @@ #pragma once -#include "agents/perf/apc_encoders.h" -#include "agents/perf/detail/perf_buffer_consumer_detail.h" -#include "agents/perf/frame_encoder.h" +#include "Logging.h" +#include "agents/perf/events/perf_ringbuffer_mmap.hpp" +#include "agents/perf/record_types.h" #include "async/continuations/async_initiate.h" #include "async/continuations/continuation.h" +#include "async/continuations/continuation_of.h" +#include "async/continuations/operations.h" +#include "async/continuations/stored_continuation.h" #include "async/continuations/use_continuation.h" -#include "lib/Span.h" +#include "ipc/raw_ipc_channel_sink.h" + +#include +#include +#include +#include -#include #include -#include +#include +#include #include namespace agents::perf { - /** - * Instances of this class track and manage the perf buffers across multiple CPUs. They are responsible - * for initiating a chain of async operations that read from each buffer and feed the results into an - * intermediate buffer. + * This class consumes the contents of the perf mmap ringbuffers, outputing perf data apc frames and perf aux apc frames. + * It is not responsible for monitoring of the perf file descriptors / periodic timer (these are handled elsewhere), but it provides + * an interface where some other caller can trigger the data in the ringbuffer(s) to be consumed. */ class perf_buffer_consumer_t : public std::enable_shared_from_this { - using strand_type = boost::asio::io_context::strand; - - template - auto async_send(std::shared_ptr> op, - std::shared_ptr intermediate_buffer, - CompletionToken && token) + public: + perf_buffer_consumer_t(boost::asio::io_context & context, + std::shared_ptr ipc_sink, + std::size_t one_shot_mode_limit) + : one_shot_mode_limit(one_shot_mode_limit), ipc_sink(std::move(ipc_sink)), strand(context) { - return op->async_send(detail::data_encode_op_t {data_encoder, intermediate_buffer}, - detail::aux_encode_op_t {aux_encoder, intermediate_buffer}, - std::forward(token)); } - public: /** - * Constructor. + * Insert a mmap into the consumer * - * @param io I/O context to use - * @param config Buffer configuration - * @exception GatorException Thrown if @a config fails validation - */ - perf_buffer_consumer_t(boost::asio::io_context & io, buffer_config_t config); - - [[nodiscard]] std::size_t get_data_buffer_length() const { return config.data_buffer_size; } - - [[nodiscard]] std::size_t get_aux_buffer_length() const { return config.aux_buffer_size; } - - /** - * Start tracking a perf event file descriptor for a specific CPU. - * - * @tparam CompletionToken Token type, expects an error_code. - * @param fd The perf event fd. - * @param cpu The CPU number that the fd is linked to. - * @param collect_aux_trace Whether to also map (and eventually read from) the aux buffer. - * @param token Called once the perf buffer has been registered with this instance, or if a - * failure has occurred. - * @return Nothing or a continuation, depending on @a CompletionToken + * @param cpu The cpu the mmap is associated with + * @param mmap The mmap object */ template - auto async_add_ringbuffer(int fd, int cpu, bool collect_aux_trace, CompletionToken && token) + auto async_add_ringbuffer(int cpu, std::shared_ptr mmap, CompletionToken && token) { using namespace async::continuations; + + LOG_DEBUG("Add new mmap request for %d", cpu); + return async_initiate>( - [self = shared_from_this(), fd, cpu, collect_aux_trace]() mutable { - return start_on(self->strand) // - | then([self, fd, cpu, collect_aux_trace]() mutable { - // Create the ringbuffer instance if necessary - auto buf_it = self->per_cpu_buffers.find(cpu); - if (buf_it == self->per_cpu_buffers.end()) { - auto op = detail::perf_consume_op_factory_t(self->strand, fd, cpu, self->config); - if (!op) { - return boost::asio::error::make_error_code(boost::asio::error::invalid_argument); - } - - buf_it = self->per_cpu_buffers.emplace(cpu, std::move(op)).first; - } - else { - // Otherwise just instruct the event FD to output to our ringbuffer - const auto ec = buf_it->second->set_output(fd); - if (ec) { - return ec; - } + [st = shared_from_this(), cpu, mmap = std::move(mmap)]() mutable { + return start_on(st->strand) // + | then([st, cpu, mmap = std::move(mmap)]() mutable { + LOG_DEBUG("Added new mmap for %d", cpu); + + // validate the pointer + if ((!mmap) || (!mmap->has_data())) { + return boost::system::errc::make_error_code(boost::system::errc::invalid_argument); } - if (collect_aux_trace) { - const auto ec = buf_it->second->attach_aux_buffer(fd); - if (ec) { - return ec; - } + // insert it into the map + auto [it, inserted] = st->per_cpu_mmaps.try_emplace(cpu, std::move(mmap)); + (void) it; + + if (!inserted) { + LOG_DEBUG("... failed, as already has mmap"); + return boost::system::errc::make_error_code( + boost::system::errc::device_or_resource_busy); } + // success return boost::system::error_code {}; }); }, - token); + std::forward(token)); } /** - * Overload where no aux data will be collected. + * Cause the mmap associated with `cpu` to be polled and any data to be written out to the capture. * - * @tparam CompletionToken Token type, expects an error_code. - * @param fd The perf event fd. - * @param cpu The CPU number that the fd is linked to. - * @param token Called once the perf buffer has been registered with this instance, or if a - * failure has occurred. - * @return Nothing or a continuation, depending on @a CompletionToken - */ - template - auto async_add_ringbuffer(int fd, int cpu, CompletionToken && token) - { - return async_add_ringbuffer(fd, cpu, false, std::forward(token)); - } - - /** - * Stop tracking for a specific CPU. + * The operation will complete successfully if the cpu is already in the process of being polled by some other trigger, or if the cpu currently doesn't have any mmap associated with it. * - * @tparam CompletionToken Token type, expects an error_code. - * @param cpu The CPU number that the fd is linked to. - * @param intermediate_buffer Buffer to dump the ringbuffer data into - * @param token Called once the perf buffer has been deregistered with this instance, or if a - * failure has occurred. - * @return Nothing or a continuation, depending on @a CompletionToken + * @param cpu The cpu for which the associated mmap should be polled */ template - auto async_remove_ringbuffer(int cpu, - std::shared_ptr intermediate_buffer, - CompletionToken && token) + auto async_poll(int cpu, CompletionToken && token) { using namespace async::continuations; + + LOG_TRACE("Poll requested for %d", cpu); + return async_initiate>( - [self = shared_from_this(), cpu, ibuf = std::move(intermediate_buffer)]() mutable { - return start_on(self->strand) - | then([self, cpu, ibuf = std::move(ibuf)]() mutable - -> polymorphic_continuation_t { - auto buf_it = self->per_cpu_buffers.find(cpu); - if (buf_it == self->per_cpu_buffers.end()) { - return start_with( - boost::asio::error::make_error_code(boost::asio::error::misc_errors::not_found)); + [st = shared_from_this(), cpu]() mutable { + return start_on(st->strand) // + | then([cpu, st]() mutable -> polymorphic_continuation_t { + LOG_TRACE("Poll started for %d", cpu); + + auto mmap_it = st->per_cpu_mmaps.find(cpu); + // ignore cpus that don't exist; its probably just poll_all + if (mmap_it == st->per_cpu_mmaps.end()) { + LOG_TRACE("No such mmap found for %d", cpu); + return start_with(boost::system::error_code {}); } - auto buf = std::move(buf_it->second); - self->per_cpu_buffers.erase(buf_it); + // if it is already being polled, also ignore the request + if (!st->busy_cpus.insert(cpu).second) { + LOG_TRACE("Already polling %d", cpu); + return start_with(boost::system::error_code {}); + } - // Drain ringbuffer before destroying - return buf->async_send( - detail::data_encode_op_t(self->data_encoder, ibuf), - detail::aux_encode_op_t(self->aux_encoder, ibuf), - use_continuation); + // ok, poll it + return do_poll(st, mmap_it->second, cpu); }); }, token); } /** - * Stop tracking for all CPUs. - * - * @tparam CompletionToken Token type, expects an error_code. - * @param intermediate_buffer Buffer to dump the ringbuffer data into - * @param token Called once complete. - * @return Nothing or a continuation, depending on @a CompletionToken + * Cause the mmap for all currently tracked cpus to be polled. */ template - auto async_remove_all(std::shared_ptr intermediate_buffer, CompletionToken && token) + auto async_poll_all(CompletionToken && token) { using namespace async::continuations; + LOG_TRACE("Poll all requested"); + return async_initiate>( - [self = shared_from_this(), ibuf = std::move(intermediate_buffer)]() mutable { - // If there's a failure, we need to report an appropriate error code, but we don't - // want to exit early as all other ringbuffers still need removing - return start_on(self->strand) // - | then([]() { return boost::system::error_code {}; }) - | loop( - [self](auto ec) { - // Even in the event of failure async_remove_ringbuffer will always - // remove the entry in per_cpu_buffers - return start_on(self->strand) // - | then([self, ec]() { return start_with(!self->per_cpu_buffers.empty(), ec); }); - }, - [self, ibuf = std::move(ibuf)](auto ec) mutable { - const int cpu = self->per_cpu_buffers.begin()->first; - return self->async_remove_ringbuffer(cpu, ibuf, use_continuation) - | then([ec](auto new_ec) mutable { - if (new_ec) { - ec = new_ec; - } - - return ec; - }); - }); + [st = shared_from_this()]() mutable { + return start_on(st->strand) // + | then([st]() mutable { + return start_with( + 0, + st->per_cpu_mmaps.size(), + {}) // + | loop( + [](std::size_t n, std::size_t count, boost::system::error_code ec) { // + return start_with((n < count) && !ec, n, count, ec); + }, + [st](std::size_t n, std::size_t count, boost::system::error_code /*ec*/) { + return st->async_poll(n, use_continuation) // + | then([n, count](auto ec) { return start_with(n + 1, count, ec); }); + }) + | then([](std::size_t /*n*/, std::size_t /*count*/, boost::system::error_code ec) { + LOG_TRACE("Poll all completed (ec=%s)", ec.message().c_str()); + return ec; + }); + }); }, token); } /** - * Write out data from a per-CPU ringbuffer to the intermediate buffer. + * Remove the mmap associated with some cpu. * - * @tparam CompletionToken Token type, expects an error_code. - * @param cpu The CPU number that the fd is linked to. - * @param intermediate_buffer Buffer to dump the ringbuffer data into - * @param token Called once complete. - * @return Nothing or a continuation, depending on @a CompletionToken + * The mmap will be polled one more time before removal, and any currently active poll operations will complete successfully in parallel. + * + * @param cpu The cpu for which the associated mmap should be removed */ template - auto async_poll(int cpu, std::shared_ptr intermediate_buffer, CompletionToken && token) + auto async_remove_ringbuffer(int cpu, CompletionToken && token) { using namespace async::continuations; - return async_initiate>( - [self = shared_from_this(), cpu, ibuf = std::move(intermediate_buffer)]() mutable { - return start_on(self->strand) - | then([self, cpu, ibuf = std::move(ibuf)]() mutable - -> polymorphic_continuation_t { - auto buf_it = self->per_cpu_buffers.find(cpu); - if (buf_it == self->per_cpu_buffers.end()) { - LOG_DEBUG("No perf buffer for CPU %d", cpu); - return start_with( - boost::asio::error::make_error_code(boost::asio::error::misc_errors::not_found)); - } - return self->async_send(buf_it->second, ibuf, use_continuation); - }); + LOG_TRACE("Remove mmap requested for %d", cpu); + + return async_initiate>( + [st = shared_from_this(), cpu]() mutable { + return start_on(st->strand) // + | then([st, cpu]() { + LOG_TRACE("Remove mmap marked for %d", cpu); + st->removed_cpus.insert(cpu); + }) // + | st->async_poll(cpu, use_continuation); }, token); } /** - * Write out data from all ringbuffers to the intermediate buffer. - * - * @tparam CompletionToken Token type, expects an error_code. - * @param intermediate_buffer Buffer to dump the ringbuffer data into - * @param token Called once complete. - * @return Nothing or a continuation, depending on @a CompletionToken + * Wait for notification that the required number of bytes is sent in one-shot mode + * NB: will never notify if one-shot mode is disabled */ template - auto async_poll_all(std::shared_ptr intermediate_buffer, CompletionToken && token) + auto async_wait_one_shot_full(CompletionToken && token) { using namespace async::continuations; - return async_initiate>( - [self = shared_from_this(), ibuf = std::move(intermediate_buffer)]() mutable { - // If there's a failure, we need to report an appropriate error code, but we don't - // want to exit early as all other ringbuffers still need polling - return start_on(self->strand) // - | then([self]() { - return start_with(self->per_cpu_buffers.begin(), boost::system::error_code {}); - }) - | loop( - [self](auto it, auto ec) { - return start_on(self->strand) - | then([=]() { return start_with(it != self->per_cpu_buffers.end(), it, ec); }); - }, - [self, ibuf = std::move(ibuf)](auto it, auto ec) mutable { - return self->async_send(it->second, ibuf, use_continuation) - | then([=](auto new_ec) mutable { - // Even in the event of failure, always increment the iterator - if (new_ec) { - ec = new_ec; - } - return start_with(++it, ec); - }); - }) - | then([](auto /*it*/, auto ec) { return ec; }); + LOG_TRACE("Wait oneshot-full requested"); + + return async_initiate_explicit( + [st = shared_from_this()](auto && sc) mutable { + submit(start_on(st->strand) // + | then([st, sc = sc.move()]() mutable { + LOG_TRACE("Wait oneshot-full started"); + + // notify directly if already full + if (st->is_one_shot_full()) { + resume_continuation(st->strand.context(), std::move(sc)); + } + + // save it for later + runtime_assert(!st->one_shot_mode_observer, + "Cannot register two one-shot mode observers"); + + st->one_shot_mode_observer = std::move(sc); + }), + sc.get_exceptionally()); }, token); } + /** Is the output data full wrt one-shot mode */ + [[nodiscard]] bool is_one_shot_full() const + { + return ((one_shot_mode_limit > 0) + && (cumulative_bytes_sent_apc_frames.load(std::memory_order_acquire) >= one_shot_mode_limit)); + } + + /** Manually trigger the one-shot-mode callback */ + void trigger_one_shot_mode() + { + // set both to non-zero to mark as triggered + one_shot_mode_limit = 1; + cumulative_bytes_sent_apc_frames.store(1, std::memory_order_release); + + // trigger if possible + boost::asio::post(strand, [st = this->shared_from_this()]() { + async::continuations::stored_continuation_t<> one_shot_mode_observer { + std::move(st->one_shot_mode_observer)}; + + if (one_shot_mode_observer) { + resume_continuation(st->strand.context(), std::move(one_shot_mode_observer)); + } + }); + } + private: - using data_encoder_type = - frame_encoder_t; - using aux_encoder_type = frame_encoder_t; + /** + * Send one apc_frame IPC message, returns the head, new-tail and error code as required at the end of each send loop iteration + * + * @param st The this pointer for the perf_buffer_consumer_t that made the request + * @param cpu The cpu associated with the request + * @param buffer The apc_frame data buffer + * @param head The aux_head or data_head value + * @param tail The new value for aux_tail or data_tail after the send completes + * @return A continuation producing the head, new-tail and error code values + */ + static async::continuations::polymorphic_continuation_t + do_send_msg(std::shared_ptr const & st, + int cpu, + std::vector buffer, + std::uint64_t head, + std::uint64_t tail); + + /** + * Common to both aux and data send loops, this function will extract the head and tail field, then iterate over the buffer until tail == head, sending some chunk and then moving tail + * + * @tparam HeadField A pointer-to-member-variable for the aux_head or data_head field in the mmap header + * @tparam TailField A pointer-to-member-variable for the aux_tail or data_tail field in the mmap header + * @tparam Op The loop body operation, that encodes and sends some chunk of the mmap. Must return a continuation over `(head, new-tail, error-code)` + * @param mmap The mmap object + * @param cpu The cpu associated with this mmap + * @param op The operation + * @return a continuation that produces an error code + */ + template<__u64 perf_event_mmap_page::*HeadField, __u64 perf_event_mmap_page::*TailField, typename Op> + static async::continuations::polymorphic_continuation_t do_send_common( + std::shared_ptr const & st, + std::shared_ptr const & mmap, + int cpu, + Op && op); - // It's better performance-wise to use unordered_map, but the lack of determinisim of frame order makes - // testing impossible (no, the default hash has does not order the buckets in ascending order) - using per_cpu_buffer_type = std::map>>; + /** + * Read and send the aux section + */ + static async::continuations::polymorphic_continuation_t do_send_aux_section( + std::shared_ptr const & st, + std::shared_ptr const & mmap, + int cpu, + boost::system::error_code ec_from_data, + bool modified_from_data); - strand_type strand; - buffer_config_t config; + /** + * Read and send the data section + */ + static async::continuations::polymorphic_continuation_t do_send_data_section( + std::shared_ptr const & st, + std::shared_ptr const & mmap, + int cpu); - std::shared_ptr data_encoder; - std::shared_ptr aux_encoder; + /** + * Construct the poll operation for one cpu + * + * @param st The shared this + * @param mmap The mmap being read from + * @param cpu The cpu to poll + */ + [[nodiscard]] static async::continuations::polymorphic_continuation_t do_poll( + std::shared_ptr const & st, + std::shared_ptr const & mmap, + int cpu); - per_cpu_buffer_type per_cpu_buffers; + std::atomic_size_t cumulative_bytes_sent_apc_frames {0}; + std::size_t one_shot_mode_limit {0}; + std::set busy_cpus {}; + std::set removed_cpus {}; + std::map> per_cpu_mmaps {}; + std::shared_ptr ipc_sink; + async::continuations::stored_continuation_t<> one_shot_mode_observer {}; + boost::asio::io_context::strand strand; }; } diff --git a/daemon/agents/perf/perf_capture.h b/daemon/agents/perf/perf_capture.h index a6176673..d06e2422 100644 --- a/daemon/agents/perf/perf_capture.h +++ b/daemon/agents/perf/perf_capture.h @@ -2,15 +2,18 @@ #pragma once -#include "CpuUtils.h" #include "GetEventKey.h" -#include "ICpuInfo.h" #include "SessionData.h" #include "Time.h" #include "agents/common/nl_cpu_monitor.h" #include "agents/common/polling_cpu_monitor.h" #include "agents/perf/capture_configuration.h" +#include "agents/perf/cpu_info.h" #include "agents/perf/cpufreq_counter.h" +#include "agents/perf/events/event_binding_manager.hpp" +#include "agents/perf/events/perf_activator.hpp" +#include "agents/perf/perf_capture_cpu_monitor.h" +#include "agents/perf/perf_capture_helper.h" #include "agents/perf/perf_driver_summary.h" #include "agents/perf/sync_generator.h" #include "apc/misc_apc_frame_ipc_sender.h" @@ -18,6 +21,7 @@ #include "async/continuations/async_initiate.h" #include "async/continuations/continuation.h" #include "async/continuations/operations.h" +#include "async/continuations/stored_continuation.h" #include "async/continuations/use_continuation.h" #include "ipc/messages.h" #include "ipc/raw_ipc_channel_sink.h" @@ -33,440 +37,225 @@ #include #include -// TODO remove me -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-parameter" - namespace agents::perf { - inline monotonic_delta_t monotonic_delta_now(std::uint64_t monotonic_start) - { - return monotonic_delta_t(getTime() - monotonic_start); - } - - //////// - /** * Manages the perf capture process. */ class perf_capture_t : public std::enable_shared_from_this { public: - /** - * Construct a capture object from the provided configuration - */ - std::shared_ptr create(boost::asio::io_context & context, - std::shared_ptr ipc_sink, - perf_capture_configuration_t & configuration) - { - return std::make_shared(context, std::move(ipc_sink), configuration); - } - - private: - /** Implements the ICpuInfo interface, providing a thin wrapper around the data received in the configuration message and allowing simple rescan of properties */ - class cpu_info_t : public ICpuInfo { - public: - explicit cpu_info_t(perf_capture_configuration_t & configuration) : configuration(configuration) {} - - [[nodiscard]] lib::Span getCpuIds() const override { return configuration.per_core_cpuids; } - - [[nodiscard]] lib::Span getClusters() const override { return configuration.clusters; } - - [[nodiscard]] lib::Span getClusterIds() const override - { - return configuration.per_core_cluster_index; - } - - [[nodiscard]] const char * getModelName() const override { return ""; } - - void updateIds(bool /*ignoreOffline*/) override - { - cpu_utils::readCpuInfo(true, false, configuration.per_core_cpuids); - ICpuInfo::updateClusterIds(configuration.per_core_cpuids, - configuration.clusters, - configuration.per_core_cluster_index); - } - - private: - perf_capture_configuration_t & configuration; - }; - - using cpu_no_t = int; - - boost::asio::io_context::strand strand; - std::shared_ptr ipc_sink; - perf_capture_configuration_t & configuration; - perf_groups_activator_t counter_groups; - cpu_info_t cpu_info {configuration}; - apc::misc_apc_frame_ipc_sender_t misc_apc_frame_ipc_sender {ipc_sink}; - std::shared_ptr> nl_kobject_uevent_cpu_monitor {}; - std::shared_ptr polling_cpu_monitor {}; - std::set monitored_pids {}; - std::unique_ptr sync_thread {}; - bool enable_on_exec {}; + using perf_capture_helper_t = agents::perf::perf_capture_helper_t<>; + using async_perf_ringbuffer_monitor_t = typename perf_capture_helper_t::async_perf_ringbuffer_monitor_t; + using perf_capture_events_helper_t = typename perf_capture_helper_t::perf_capture_events_helper_t; + using event_binding_manager_t = typename perf_capture_events_helper_t::event_binding_manager_t; + using process_monitor_t = typename perf_capture_helper_t::process_monitor_t; /** - * For a single cpu, read the initial counter values for any counters that must be polled on start up. - * - * Currently, this is only for the cpu_frequency counter. - * - * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) - * @param cpu_no The number of the cpu for which counters should be polled - */ - [[nodiscard]] auto co_read_initial_counter_value(std::uint64_t monotonic_start, cpu_no_t cpu_no) - { - using namespace async::continuations; - - auto st = shared_from_this(); - - return start_on(strand) // - | then([st, monotonic_start, cpu_no]() -> polymorphic_continuation_t<> { - // read the counter - auto counter = read_cpu_frequency(cpu_no, - st->cpu_info, - st->configuration.cluster_keys_for_cpu_frequency_counter); - - if (!counter) { - return {}; - } - - std::array counter_values {{ - *counter, - }}; - - return st->misc_apc_frame_ipc_sender.async_send_perf_counters_frame( - monotonic_delta_now(monotonic_start), - counter_values, - use_continuation) // - | map_error(); - }); - } - - /** - * For all cpus, read the initial counter values for any counters that must be polled on start up. - * - * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) - */ - [[nodiscard]] auto co_read_initial_counter_values(std::uint64_t monotonic_start) - { - using namespace async::continuations; - - return iterate(0UL, cpu_info.getNumberOfCores(), [st = shared_from_this(), monotonic_start](auto cpu_no) { - return st->co_read_initial_counter_value(monotonic_start, cpu_no); - }); - } - - /** - * Poll all currently running processes/threads in /proc and write their basic properties (pid, tid, comm, exe) into the capture - * - * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) - */ - [[nodiscard]] auto co_read_process_properties(std::uint64_t monotonic_start) - { - // TODO - SDDAP-11253 - using namespace async::continuations; - - return start_with(); - } - - /** - * Poll all currently running processes/threads in /proc and write their `maps` file contents into the capture - * - * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) - */ - [[nodiscard]] auto co_read_process_maps(std::uint64_t monotonic_start) - { - // TODO - SDDAP-11254 - using namespace async::continuations; - - return start_with(); - } - - /** - * Read the kallsyms file and write into the capture - */ - [[nodiscard]] auto co_read_kallsyms() - { - // TODO - dep SDDAP-11229 - using namespace async::continuations; - - return start_with(); - } - - /** - * Send a core name apc frame - * - * @param cpu_no The cpu number of the core to send the message for - */ - [[nodiscard]] auto co_send_core_name_msg(cpu_no_t cpu_no) - { - using namespace async::continuations; - - auto st = shared_from_this(); - - return start_on(st->strand) // - | then([st, cpu_no]() -> polymorphic_continuation_t<> { - // Don't send information on a cpu we know nothing about - const int cpu_id = st->cpu_info.getCpuIds()[cpu_no]; - if (cpu_id == -1) { - return {}; - } - - // we use cpuid lookup here for look up rather than clusters because it maybe a cluster - // that wasn't known at start up - auto it = st->configuration.cpuid_to_core_name.find(cpu_id); - if (it != st->configuration.cpuid_to_core_name.end()) { - return st->misc_apc_frame_ipc_sender.async_send_core_name(cpu_no, - cpu_id, - it->second, - use_continuation) - | map_error(); - } - - // create the core name string - lib::printf_str_t<32> buf {"Unknown (0x%.3x)", cpu_id}; - return st->misc_apc_frame_ipc_sender.async_send_core_name(cpu_no, - cpu_id, - std::string(buf), - use_continuation) - | map_error(); - }); - } - - /** - * Send the initial summary frame - * - * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) - */ - [[nodiscard]] async::continuations::polymorphic_continuation_t<> co_send_summary_frame( - std::uint64_t monotonic_start) - { - using namespace async::continuations; - - auto st = shared_from_this(); - auto state = create_perf_driver_summary_state(configuration.perf_config, monotonic_start); - - if (!state) { - return start_with( - boost::asio::error::make_error_code(boost::asio::error::basic_errors::operation_aborted)) - | map_error(); - } - - return start_with() - // send the summary - | misc_apc_frame_ipc_sender.async_send_summary_message(std::move(*state), - use_continuation) // - | map_error() - // send core names - | iterate(0UL, cpu_info.getNumberOfCores(), [st](cpu_no_t cpu_no) { - return st->co_send_core_name_msg(cpu_no); - }); - } - - /** - * Activate all the perf events for a given core, and start observing them in the ring buffer, - * but do not necessarily enable the events. - * - * Events will only be enabled if `start_counters` was previously called, or we are `enable_on_exec` and - * `co_exec_child` was previously completed. - * - * @param cpu_no The core for which to enable events - * @return The async result will be a bool indicating true for successful onlining, and false for core is offline. + * Construct a capture object from the provided configuration */ - [[nodiscard]] auto co_prepare_per_core_events(cpu_no_t cpu_no) + static std::shared_ptr create(boost::asio::io_context & context, + process_monitor_t & process_monitor, + std::shared_ptr ipc_sink, + agent_environment_base_t::terminator terminator, + std::shared_ptr configuration) { - // TODO: create all PEAs and buffer groups and start observign for events - // - SDDAP-11258, SDDAP-11259 - using namespace async::continuations; - - return start_with(true); + return std::make_shared(context, + process_monitor, + std::move(ipc_sink), + std::move(terminator), + std::move(configuration)); } /** - * Deactivate all the perf events for a given core and stop observing them. + * Construct a new perf capture object * - * @param cpu_no The core for which to enable events + * @param context The io context + * @param sink The raw ipc channel sink + * @param conf The configuration message contents */ - [[nodiscard]] auto co_remove_per_core_events(cpu_no_t cpu_no) + perf_capture_t(boost::asio::io_context & context, + process_monitor_t & process_monitor, + std::shared_ptr sink, + agent_environment_base_t::terminator terminator, + std::shared_ptr conf) + : strand(context), + ipc_sink(std::move(sink)), + configuration(std::move(conf)), + perf_activator(std::make_shared(configuration, context)), + perf_capture_helper(std::make_shared( + configuration, + context, + process_monitor, + std::move(terminator), + std::make_shared( + context, + ipc_sink, + perf_activator, + configuration->session_data.live_rate, + (configuration->session_data.one_shot ? configuration->session_data.total_buffer_size : 0)), + perf_capture_events_helper_t(configuration, + event_binding_manager_t(perf_activator, + configuration->event_configuration, + configuration->uncore_pmus, + configuration->per_core_spe_type, + configuration->perf_config.is_system_wide, + configuration->enable_on_exec), + std::move(configuration->pids)), + std::make_shared(configuration), + ipc_sink)), + perf_capture_cpu_monitor(std::make_shared(context, + configuration->num_cpu_cores, + perf_capture_helper)) { - // TODO: create all PEAs and buffer groups and start observign for events - // DO NOT enable the PEAs yet - // - SDDAP-11261 - using namespace async::continuations; - - return start_with(); } /** - * Rescan for any changes to the CPU info, sending the appropriate core name message - * - * @param cpu_no The core for which to enable events + * Called once at agent start *after* the capture configuration is received, prepares the agent + * ready for capture. */ - [[nodiscard]] auto co_rescan_cpu_info(cpu_no_t cpu_no) + template + auto async_prepare(CompletionToken && token) { using namespace async::continuations; - return start_on(strand) // - | then([st = shared_from_this()]() { st->cpu_info.updateIds(true); }) // - | co_send_core_name_msg(cpu_no); + return async_initiate( + [st = shared_from_this()]() { + // spawn a thread to poll for process to start or fork (but not exec the app we are launching) + // do not block on the continuation here, as it blocks the message loop + spawn("async_prepare", + st->perf_capture_helper->async_prepare_process(use_continuation) + // tell the shell gator that we are ready + | st->perf_capture_helper->async_notify_agent_ready(use_continuation) + // + | map_error_and_discard(), + [st](bool failed) { + // an error occured, terminate + if (failed) { + st->perf_capture_helper->terminate(); + } + }); + + return start_with(); + }, + std::forward(token)); } /** - * Output any cpu online/offline event messages as part of a state change + * Called once the 'msg_start_t' message is received * - * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) - * @param cpu_no The core for which to enable events - * @param online True if the core was online, false if it was offline + * @param monotonic_start The monotonic start time */ - [[nodiscard]] auto co_core_state_change_msg(std::uint64_t monotonic_start, cpu_no_t cpu_no, bool online) + template + auto async_on_received_start_message(std::uint64_t monotonic_start, CompletionToken && token) { - using namespace async::continuations; - // TODO: Create the apc frame - return ipc_sink->async_send_message( - ipc::msg_cpu_state_change_t {{monotonic_delta_now(monotonic_start), cpu_no, online}}, - use_continuation) - | map_error_and_discard(); + return async_initiate( + [st = shared_from_this(), monotonic_start]() { + return start_on(st->strand) + // send the summary frame + | st->perf_capture_helper->async_send_summary_frame(monotonic_start, use_continuation) + // start generating sync events and set misc ready parts for the helper + | then([st, monotonic_start]() { + st->perf_capture_helper->enable_counters(); + st->perf_capture_helper->observe_one_shot_event(); + st->start_sync_thread(monotonic_start); + }) + // Start any pid monitoring + | st->perf_capture_helper->async_start_pids(use_continuation) + // bring online the core monitoring (after setting start_counters, as this enables the buffer monitor and tells the event binding set to activate in a started state) + | st->perf_capture_cpu_monitor->async_start_monitoring(monotonic_start, use_continuation) + // send any manually read initial counter values + | st->perf_capture_helper->async_read_initial_counter_values(monotonic_start, use_continuation) + // Spawn a separate async 'threads' to send various system-wide bits of data whilst the rest of the capture process continues + | then([st]() { + // the process initial properties + spawn_terminator( + "process properies reader", + st, + st->perf_capture_helper->async_read_process_properties(use_continuation)); + + // and the contents of each process 'maps' file + spawn_terminator("process maps reader", + st, + st->perf_capture_helper->async_read_process_maps(use_continuation)); + + // and the contents of kallsyms file + spawn_terminator("kallsyms reader", + st, + st->perf_capture_helper->async_read_kallsyms(use_continuation)); + + // - finally, once the cores are all online, exec the child process + spawn_terminator( + "waiting for cores to online", + st, + st->perf_capture_cpu_monitor->async_wait_for_all_cores_ready(use_continuation) + | then([st](bool ready) -> polymorphic_continuation_t<> { + if (!ready) { + return {}; + } + + // tell shell gator that the capture has started and then exec the forked process + return st->perf_capture_helper->async_notify_start_capture( + use_continuation) + | st->perf_capture_helper->async_exec_child(use_continuation); + })); + }); + }, + std::forward(token)); } - /** - * Handle a state change event from the CPU online/offline monitor - * - * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) - * @param cpu_no The core for which to enable events - * @param online True if the core was online, false if it was offline - */ - [[nodiscard]] auto co_update_cpu_state(std::uint64_t monotonic_start, cpu_no_t cpu_no, bool online) + /** Called to shutdown the capture */ + template + auto async_shutdown(CompletionToken && token) { using namespace async::continuations; - auto st = shared_from_this(); + return async_initiate( + [st = shared_from_this()]() { + return start_on(st->strand) // + | then([st]() mutable { + // trigger termination of various parts + st->perf_capture_cpu_monitor->terminate(); - return start_on(strand) // - | predicate([st]() { return !st->is_terminated(); }) - | do_if_else( - [online]() { return online; }, - // when online - [st, monotonic_start, cpu_no]() { - return start_with() - // rescan for the updated cpuid - | st->co_rescan_cpu_info(cpu_no) - // then create the PEAs and attach them to the buffer - | st->co_prepare_per_core_events(cpu_no) - // act according to whether or not the core actually was online (as it could go off again during activation) - | then([st, monotonic_start, cpu_no]( - bool really_online) mutable -> polymorphic_continuation_t<> { - // if it didnt come online for some reason, then send an offline event - if (!really_online) { - return start_with() - // deactivate all the events - | st->co_remove_per_core_events(cpu_no) - // write out an offline APC frame - | st->co_core_state_change_msg(monotonic_start, cpu_no, false); - } + st->perf_capture_helper->terminate(); - // is online, then read its counters and write out state change msg - return start_with() - // read the initial freq value - | st->co_read_initial_counter_value(monotonic_start, cpu_no) - // write out an online/offline APC frame - | st->co_core_state_change_msg(monotonic_start, cpu_no, true); - }); - }, - // when offline - [st, monotonic_start, cpu_no]() { - return start_with() - // deactivate all the events - | st->co_remove_per_core_events(cpu_no) - // write out an offline APC frame - | st->co_core_state_change_msg(monotonic_start, cpu_no, false); - }); - } + if (st->sync_thread) { + st->sync_thread->terminate(); + } - /** - * Launch any android package and then poll for the process to start. - * Once the process is detected as running, the list of tracked pids is updated. - */ - [[nodiscard]] auto co_wait_for_process() - { - // TODO - // -- 1. tell the shell to launch any android package (msg_exec_target_app_t) - // -- 2. poll proc for the process to start (check for termination) - using namespace async::continuations; - - return start_with(); + // then wait for the ringbuffers to be drained + return st->perf_capture_helper->async_wait_terminated(use_continuation); + }); + }, + std::forward(token)); } - /** - * Fork (but not exec) the child process. The process is forked so that its pid is known - * and events may be attached to it. The process is only exec'd once the capture is - * ready to start. - */ - [[nodiscard]] auto co_fork_process() - { - // TODO - // -- 1. fork the child process but do not exec it - - using namespace async::continuations; - - return start_with(); - } + private: + using cpu_no_t = int; - /** - * Prepare any process that should be profiled; at the end of this operation the list - * of tracked pids will contains one or more values prepresenting the processes to - * profile. - * When making a system-wide capture (without --app/--pid etc), or for where the pids - * are already specified (with --pid) then this operation is a nop. - */ - [[nodiscard]] auto co_prepare_process() + template + static void spawn_terminator(char const * name, + std::shared_ptr const & shared_this, + async::continuations::continuation_t && continuation) { - using namespace async::continuations; - - auto st = shared_from_this(); - - return start_with() - | do_if([st]() { return !st->configuration.wait_process->empty(); }, // - [st]() { return st->co_wait_for_process(); }) - | do_if([st]() { return st->configuration.command.has_value(); }, // - [st]() { return st->co_fork_process(); }); + spawn(name, std::move(continuation), [shared_this](bool failed) { + if (failed) { + shared_this->perf_capture_helper->terminate(); + } + }); } - /** - * Exec the child process forked previously for --app - */ - [[nodiscard]] auto co_exec_child() - { - // TODO - using namespace async::continuations; - - // - make sure to call start_counters, even if enable_on_exec is true, - // but only once the process has exec'd so that we don't somehow race - // a core coming online and the process exec-ing. - - return start_with(); - } + boost::asio::io_context::strand strand; + std::shared_ptr ipc_sink; + std::shared_ptr configuration; + std::shared_ptr cpu_info {}; + std::shared_ptr perf_activator {}; + std::shared_ptr perf_capture_helper {}; + std::unique_ptr sync_thread {}; + std::shared_ptr perf_capture_cpu_monitor {}; /** @return True if the capture is terminated, false if not */ - [[nodiscard]] bool is_terminated() const - { - // TODO - return false; - } - - /** Terminate the running capture */ - void terminate() - { - // TODO - - if (sync_thread) { - sync_thread->terminate(); - } - }; - - /** Enable all counters so that they start producing events */ - void start_counters() { - // TODO - }; + [[nodiscard]] bool is_terminated() const { return perf_capture_cpu_monitor->is_terminated(); } /** * Launch the SPE sync thread @@ -478,225 +267,14 @@ namespace agents::perf { if (!is_terminated()) { runtime_assert(sync_thread == nullptr, "start_sync_thread called twice"); - sync_thread = sync_generator::create(configuration.perf_config.has_attr_clockid_support, - counter_groups.hasSPE(), + sync_thread = sync_generator::create(configuration->perf_config.has_attr_clockid_support, + perf_capture_helper->has_spe(), ipc_sink); - sync_thread->start(monotonic_start); + if (sync_thread != nullptr) { + sync_thread->start(monotonic_start); + } } }; - - /** - * Common cpu monitoring setup code - * - * @tparam Monitor The monitor type - * @param st The shared pointer to this - * @param monitor The monitor pointer - */ - template - static void start_monitoring_cpus(std::uint64_t monotonic_start, - std::shared_ptr const & st, - std::shared_ptr monitor) - { - using namespace async::continuations; - - // repeatedly consume online/offline events - repeatedly( - [st]() { - return start_on(st->strand) // - | then([st]() { return !st->is_terminated(); }); - }, // - [st, monitor, monotonic_start]() mutable { - return monitor->async_receive_one(use_continuation) // - | map_error() // - | then([st, monotonic_start](auto event) mutable { - return st->co_update_cpu_state(monotonic_start, event.cpu_no, event.online); - }); - }) - | finally([st](auto err) { - // log the failure - error_swallower_t::consume("cpu monitoring", err); - - // make sure to terminate - st->terminate(); - }); - } - - /** - * Start observing for CPU online events from netlink - */ - void start_monitoring_uevents(std::uint64_t monotonic_start) - { - using namespace async::continuations; - - auto st = shared_from_this(); - auto monitor = nl_kobject_uevent_cpu_monitor; - - // should already be created and open - runtime_assert(monitor != nullptr, "monitor is nullptr"); - runtime_assert(monitor->is_open(), "monitor is not open"); - - start_monitoring_cpus(monotonic_start, shared_from_this(), std::move(monitor)); - } - - /** - * Start observing for CPU online events by polling sysfs - */ - void start_polling_cpus(std::uint64_t monotonic_start) - { - using namespace async::continuations; - - // expected to create it on first use - runtime_assert(polling_cpu_monitor == nullptr, "polling_cpu_monitor is not nullptr"); - - auto monitor = polling_cpu_monitor = std::make_shared(strand.context()); - - start_monitoring_cpus(monotonic_start, shared_from_this(), std::move(monitor)); - } - - public: - /** - * Construct a new perf capture object - * - * @param context The io context - * @param ipc_sink The raw ipc channel sink - * @param configuration The configuration message contents - */ - perf_capture_t(boost::asio::io_context & context, - std::shared_ptr ipc_sink, - perf_capture_configuration_t & configuration) - : strand(context), - ipc_sink(std::move(ipc_sink)), - configuration(configuration), - counter_groups { - { - configuration.perf_config, - configuration.clusters, - configuration.per_core_cluster_index, - configuration.session_data.exclude_kernel_events || configuration.perf_config.exclude_kernel, - }, - configuration.perf_groups, - }, - nl_kobject_uevent_cpu_monitor(std::make_shared>(context)), - monitored_pids(std::move(configuration.pids)) - { - if ((!configuration.perf_config.is_system_wide) && (!configuration.perf_config.has_attr_clockid_support)) { - LOG_DEBUG("Tracing gatord as well as target application as no clock_id support"); - monitored_pids.insert(getpid()); - } - - // allow self profiling -#if (defined(GATOR_SELF_PROFILE) && (GATOR_SELF_PROFILE != 0)) - const bool profileGator = true; -#else - const bool profileGator = - (monitored_pids.erase(0) != 0); // user can set --pid 0 to dynamically enable this feature -#endif - if (profileGator) { - // track child and parent process - monitored_pids.insert(getpid()); - monitored_pids.insert(getppid()); - } - - // was !enableOnCommandExec but this causes us to miss the exec comm record associated with the - // enable on exec doesn't work for cpu-wide events. - // additionally, when profiling gator, must be turned off - this->enable_on_exec = (configuration.enable_on_exec && !configuration.perf_config.is_system_wide - && configuration.perf_config.has_attr_clockid_support - && configuration.perf_config.has_attr_comm_exec && !profileGator); - } - - /** - * Called once at agent start *after* the capture configuration is received, prepares the agent - * ready for capture. - */ - auto co_prepare() - { - using namespace async::continuations; - - auto st = shared_from_this(); - - return start_on(strand) - // poll for process to start or fork (but not exec the app we are launching) - | co_prepare_process() - // tell the shell gator that we are ready - | then([st]() { - // must be in a then as we need to access the monitored_pids variable - return start_on(st->strand) // - | st->ipc_sink->async_send_message( - ipc::msg_capture_ready_t {std::vector(st->monitored_pids.begin(), // - st->monitored_pids.end())}, // - use_continuation); - }) - // - | map_error_and_discard(); - } - - /** - * Called once the 'msg_start_t' message is received - * - * @param monotonic_start The monotonic start time - */ - auto co_on_received_start_message(std::uint64_t monotonic_start) - { - using namespace async::continuations; - - auto st = shared_from_this(); - - return start_on(strand) - // send the summary frame - | co_send_summary_frame(monotonic_start) - // bring online the core monitoring - | then([st, monotonic_start]() { - // monitor for cpu state changes (do this early so we don't miss anything) - if (st->nl_kobject_uevent_cpu_monitor && st->nl_kobject_uevent_cpu_monitor->is_open()) { - st->start_monitoring_uevents(monotonic_start); - } - else { - st->start_polling_cpus(monotonic_start); - } - }) - // attempt to bring all cores online (any that were already started by the monitor will be ignored) - | iterate(0U, - st->configuration.num_cpu_cores, - [st, monotonic_start](cpu_no_t cpu_no) { - return st->co_update_cpu_state(monotonic_start, cpu_no, true); - }) - // start the counters and sync thread - | then([st, monotonic_start]() { - // start generating sync events - st->start_sync_thread(monotonic_start); - // Start events before reading proc to avoid race conditions - if (!st->enable_on_exec) { - st->start_counters(); - } - }) - // send any manually read initial counter values - | st->co_read_initial_counter_values(monotonic_start) - // and the process initial properties - | st->co_read_process_properties(monotonic_start) - // and the contents of each process 'maps' file - | st->co_read_process_maps(monotonic_start) - // and the contents of kallsyms file - | st->co_read_kallsyms() - // exec the process (if there is one) - | st->co_exec_child(); - } }; } - -/* - things not in perpare that need to be done on the shell side before sending the capture configuration message: - - // Reread cpuinfo since cores may have changed since startup - mCpuInfo.updateIds(false); - -// must be done in shell - if (!ftraceDriver.readTracepointFormats(buffer, printb, b1)) { - LOG_DEBUG("FtraceDriver::readTracepointFormats failed"); - return false; - } - -*/ - -#pragma GCC diagnostic pop diff --git a/daemon/agents/perf/perf_capture_cpu_monitor.h b/daemon/agents/perf/perf_capture_cpu_monitor.h new file mode 100644 index 00000000..f647b851 --- /dev/null +++ b/daemon/agents/perf/perf_capture_cpu_monitor.h @@ -0,0 +1,430 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "Time.h" +#include "agents/common/coalescing_cpu_monitor.h" +#include "agents/common/nl_cpu_monitor.h" +#include "agents/common/polling_cpu_monitor.h" +#include "agents/perf/perf_capture_helper.h" +#include "async/continuations/async_initiate.h" +#include "async/continuations/continuation.h" +#include "async/continuations/operations.h" +#include "async/continuations/stored_continuation.h" +#include "async/continuations/use_continuation.h" + +#include +#include + +#include +#include + +namespace agents::perf { + /** + * Monitors each CPU for online/offline and activates cores. + * Templated base class (so that compile time substitution for unit testing is possible, use perf_capture_cpu_monitor_t instead) + */ + template + class basic_perf_capture_cpu_monitor_t + : public std::enable_shared_from_this< + basic_perf_capture_cpu_monitor_t> { + private: + using perf_capture_helper_t = PerfCaptureHelper; + using nl_kobject_uevent_cpu_monitor_t = NetlinkCpuMonitor; + using polling_cpu_monitor_t = PollingCpuMonitor; + using all_cores_ready_handler_t = async::continuations::stored_continuation_t; + + boost::asio::io_context::strand strand; + std::shared_ptr perf_capture_helper {}; + std::shared_ptr coalescing_cpu_monitor {}; + std::shared_ptr nl_kobject_uevent_cpu_monitor {}; + std::shared_ptr polling_cpu_monitor {}; + std::set cores_having_received_initial_event {}; + all_cores_ready_handler_t all_cores_ready_handler {}; + std::size_t num_cpu_cores; + bool terminated {false}; + bool notified_all_cores_ready_handler {false}; + + /** + * Perform the steps required to offline a cpu + * + * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) + * @param cpu_no The core for which to enable events + */ + [[nodiscard]] auto co_offline_cpu(std::uint64_t monotonic_start, int cpu_no) + { + using namespace async::continuations; + + LOG_DEBUG("Offlining cpu # %d", cpu_no); + + return + // deactivate all the events + perf_capture_helper->async_remove_per_core_events(cpu_no, use_continuation) + // write out an offline APC frame + | perf_capture_helper->async_core_state_change_msg(monotonic_start, cpu_no, false, use_continuation); + } + + /** + * Perform the steps required to online a cpu + * + * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) + * @param cpu_no The core for which to enable events + */ + [[nodiscard]] auto co_online_cpu(std::uint64_t monotonic_start, int cpu_no) + { + using namespace async::continuations; + + LOG_DEBUG("Onlining cpu # %d", cpu_no); + + return + // rescan for the updated cpuid + perf_capture_helper->async_rescan_cpu_info(cpu_no, use_continuation) + // then create the PEAs and attach them to the buffer + | perf_capture_helper->async_prepare_per_core_events(cpu_no, use_continuation) + // act according to whether or not the core actually was online (as it could go off again during activation) + | then([st = this->shared_from_this(), monotonic_start, cpu_no]( + bool really_online) mutable -> polymorphic_continuation_t<> { + // if it didnt come online for some reason, then send an offline event + if (!really_online) { + LOG_DEBUG("Onlining cpu # %d failed as not all cores came online", cpu_no); + return st->co_offline_cpu(monotonic_start, cpu_no); + } + + // is online, then read its counters and write out state change msg + return + // read the initial freq value + st->perf_capture_helper->async_read_initial_counter_value( + monotonic_start, + cpu_no, + async::continuations::use_continuation) + // write out an online/offline APC frame + | st->perf_capture_helper->async_core_state_change_msg(monotonic_start, + cpu_no, + true, + use_continuation); + }); + } + + /** + * Handle a state change event from the CPU online/offline monitor + * + * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) + * @param cpu_no The core for which to enable events + * @param online True if the core was online, false if it was offline + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_update_cpu_state(std::uint64_t monotonic_start, + int cpu_no, + bool online, + CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this(), monotonic_start, cpu_no, online]() { + return start_on(st->strand) // + | do_if([st, cpu_no]() { return (cpu_no >= 0) && (!st->is_terminated()); }, + [st, monotonic_start, cpu_no, online]() { + return start_with() // + | do_if_else([online]() { return online; }, + // when online + [st, monotonic_start, cpu_no]() { + return st->co_online_cpu(monotonic_start, cpu_no); + }, + // when offline + [st, monotonic_start, cpu_no]() { + return st->co_offline_cpu(monotonic_start, cpu_no); + }); + }); + }, + std::forward(token)); + } + + /** + * Common cpu monitoring setup code + * + * @tparam Monitor The monitor type + * @param st The shared pointer to this + * @param monitor The monitor pointer + */ + template + static void start_monitoring_cpus(std::uint64_t monotonic_start, + std::shared_ptr st, + std::shared_ptr monitor) + { + using namespace async::continuations; + + auto coalescing_cpu_monitor = st->coalescing_cpu_monitor; + + // repeatedly consume online/offline events from the underlying monitor and inject them into the coalescing monitor + spawn("cpu monitoring (from raw)", + repeatedly( + [st]() { + return start_on(st->strand) // + | then([st]() { return !st->is_terminated(); }); + }, // + [coalescing_cpu_monitor, monitor]() mutable { + return monitor->async_receive_one(use_continuation) // + | map_error() // + | then([coalescing_cpu_monitor](auto event) mutable { + return coalescing_cpu_monitor->async_update_state(event.cpu_no, + event.online, + use_continuation); + }); + }), + [st](bool) { + // make sure to terminate + st->terminate(); + }); + + // repeatedly consume online/offline events from the coalescing monitor + spawn("cpu monitoring (from coalescer)", + repeatedly( + [st]() { + return start_on(st->strand) // + | then([st]() { return !st->is_terminated(); }); + }, // + [st, coalescing_cpu_monitor, monotonic_start]() mutable { + return coalescing_cpu_monitor->async_receive_one(use_continuation) // + | map_error() // + | then([st, monotonic_start](auto event) mutable { + return st->async_update_cpu_state(monotonic_start, + event.cpu_no, + event.online, + use_continuation) // + | post_on(st->strand) // + | then([st, cpu_no = event.cpu_no]() { + st->check_cores_having_received_initial_event(cpu_no); + }); + }); + }), + [st](bool) { + // make sure to terminate + st->terminate(); + }); + } + + /** + * Check / notify the handler when all cores have received on event + * + * @param cpu_no The core that received an event + */ + void check_cores_having_received_initial_event(int cpu_no) + { + if ((cpu_no < 0) || (std::size_t(cpu_no) > num_cpu_cores)) { + return; + } + + auto [it, inserted] = cores_having_received_initial_event.insert(cpu_no); + (void) it; // gcc 7 :-( + + if (inserted) { + LOG_DEBUG("Core %d received its first event", cpu_no); + } + + if (inserted && (cores_having_received_initial_event.size() == num_cpu_cores)) { + LOG_DEBUG("All cores are now ready"); + all_cores_ready_handler_t all_cores_ready_handler {std::move(this->all_cores_ready_handler)}; + if (all_cores_ready_handler) { + LOG_DEBUG("Notifiying that all are ready"); + notified_all_cores_ready_handler = true; + resume_continuation(strand.context(), std::move(all_cores_ready_handler), !terminated); + } + } + } + + /** + * Start observing for CPU online events from netlink + */ + void start_monitoring_uevents(std::uint64_t monotonic_start) + { + using namespace async::continuations; + + auto st = this->shared_from_this(); + auto monitor = nl_kobject_uevent_cpu_monitor; + + // should already be created and open + runtime_assert(monitor != nullptr, "monitor is nullptr"); + runtime_assert(monitor->is_open(), "monitor is not open"); + + start_monitoring_cpus(monotonic_start, std::move(st), std::move(monitor)); + } + + /** + * Start observing for CPU online events by polling sysfs + */ + void start_polling_cpus(std::uint64_t monotonic_start) + { + using namespace async::continuations; + + auto monitor = polling_cpu_monitor; + + // create it on demand if necessary + if (monitor == nullptr) { + polling_cpu_monitor = monitor = std::make_shared(strand.context()); + } + + start_monitoring_cpus(monotonic_start, this->shared_from_this(), std::move(monitor)); + } + + public: + basic_perf_capture_cpu_monitor_t(boost::asio::io_context & context, + std::size_t num_cpu_cores, + std::shared_ptr perf_capture_helper) + : strand(context), + perf_capture_helper(std::move(perf_capture_helper)), + coalescing_cpu_monitor(std::make_shared(context)), + nl_kobject_uevent_cpu_monitor(std::make_shared(context)), + polling_cpu_monitor(), + num_cpu_cores(num_cpu_cores) + { + } + + basic_perf_capture_cpu_monitor_t(boost::asio::io_context & context, + std::size_t num_cpu_cores, + std::shared_ptr perf_capture_helper, + std::shared_ptr nl_kobject_uevent_cpu_monitor, + std::shared_ptr polling_cpu_monitor) + : strand(context), + perf_capture_helper(std::move(perf_capture_helper)), + coalescing_cpu_monitor(std::make_shared(context)), + nl_kobject_uevent_cpu_monitor(std::move(nl_kobject_uevent_cpu_monitor)), + polling_cpu_monitor(std::move(polling_cpu_monitor)), + num_cpu_cores(num_cpu_cores) + { + } + + /** @return True if the capture is terminated, false if not */ + [[nodiscard]] bool is_terminated() const { return terminated; } + + /** Terminate the running capture */ + void terminate() + { + if (terminated) { + return; + } + + LOG_DEBUG("Terminating Perf CPU monitor"); + + terminated = true; + + auto nl_kobject_uevent_cpu_monitor = this->nl_kobject_uevent_cpu_monitor; + if (nl_kobject_uevent_cpu_monitor) { + nl_kobject_uevent_cpu_monitor->stop(); + } + + auto polling_cpu_monitor = this->polling_cpu_monitor; + if (polling_cpu_monitor) { + polling_cpu_monitor->stop(); + } + + auto coalescing_cpu_monitor = this->coalescing_cpu_monitor; + if (coalescing_cpu_monitor) { + coalescing_cpu_monitor->stop(); + } + + auto perf_capture_helper = this->perf_capture_helper; + if (perf_capture_helper) { + perf_capture_helper->terminate(); + } + + // cancel any pending handler + all_cores_ready_handler_t all_cores_ready_handler {std::move(this->all_cores_ready_handler)}; + if (all_cores_ready_handler) { + resume_continuation(strand.context(), std::move(all_cores_ready_handler), false); + } + }; + + /** + * Handle a state change event from the CPU online/offline monitor + * + * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_start_monitoring(std::uint64_t monotonic_start, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this(), monotonic_start]() { + // monitor for cpu state changes (do this early so we don't miss anything) + return start_on(st->strand) // + // attempt to bring all cores online at startup by injecting an initial online event + | iterate(std::size_t {0}, + st->num_cpu_cores, + [st](int cpu_no) { + return st->coalescing_cpu_monitor->async_update_state(cpu_no, + true, + use_continuation); + }) + // start monitoring events which will bring online/offline as appropriate + | then([st, monotonic_start]() { + if (st->nl_kobject_uevent_cpu_monitor && st->nl_kobject_uevent_cpu_monitor->is_open()) { + // free the polling object + st->polling_cpu_monitor.reset(); + // and start monitoring + st->start_monitoring_uevents(monotonic_start); + } + else { + // free the netlink object + st->nl_kobject_uevent_cpu_monitor.reset(); + // and start monitoring + st->start_polling_cpus(monotonic_start); + } + }); + }, + std::forward(token)); + } + + /** + * Wait for all the cores to receive their first online/offline event + */ + template + [[nodiscard]] auto async_wait_for_all_cores_ready(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate_explicit( + [st = this->shared_from_this()](auto && sc) { + submit(start_on(st->strand) // + | then([st, sc = sc.move()]() mutable { + // cancel any pending handler + all_cores_ready_handler_t all_cores_ready_handler { + std::move(st->all_cores_ready_handler)}; + if (all_cores_ready_handler) { + LOG_DEBUG("Cancelling pending handler"); + resume_continuation(st->strand.context(), + std::move(all_cores_ready_handler), + false); + } + + // cancel it as already previously notified or terminated? + if (st->notified_all_cores_ready_handler || st->terminated) { + LOG_DEBUG("Cancelling pending handler as already notified"); + resume_continuation(st->strand.context(), std::move(sc), false); + } + // directly post this new one? + else if ((st->cores_having_received_initial_event.size() == st->num_cpu_cores)) { + LOG_DEBUG("Notifiying that all are ready"); + st->notified_all_cores_ready_handler = true; + resume_continuation(st->strand.context(), std::move(sc), !st->terminated); + } + // otherwise just store it + else { + LOG_DEBUG("Storing ready handler"); + st->all_cores_ready_handler = std::move(sc); + } + }), + sc.get_exceptionally()); + }, + std::forward(token)); + } + }; + + /** The cpu monitor type */ + using perf_capture_cpu_monitor_t = basic_perf_capture_cpu_monitor_t, + nl_kobject_uevent_cpu_monitor_t<>, + polling_cpu_monitor_t>; +} diff --git a/daemon/agents/perf/perf_capture_events_helper.hpp b/daemon/agents/perf/perf_capture_events_helper.hpp new file mode 100644 index 00000000..ac14d08a --- /dev/null +++ b/daemon/agents/perf/perf_capture_events_helper.hpp @@ -0,0 +1,643 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "agents/perf/events/event_binding_manager.hpp" +#include "agents/perf/events/perf_activator.hpp" +#include "agents/perf/events/types.hpp" +#include "lib/error_code_or.hpp" +#include "linux/proc/ProcessChildren.h" + +#include +#include +#include +#include +#include + +#include +#include + +#include + +namespace agents::perf { + /** + * Helper for managing events and pids + * + * @tparam EventBindingManager The event_binding_manager_t (or a mock, for unit tests) + */ + template> + class perf_capture_events_helper_t { + public: + using event_binding_manager_t = EventBindingManager; + + using event_binding_manager_type = std::decay_t; + using id_to_key_mappings_t = typename event_binding_manager_type::id_to_key_mappings_t; + using stream_descriptor_t = typename event_binding_manager_type::stream_descriptor_t; + using core_no_fd_pair_t = typename event_binding_manager_type::core_no_fd_pair_t; + using fd_aux_flag_pair_t = typename event_binding_manager_type::fd_aux_flag_pair_t; + + static constexpr pid_t header_pid = event_binding_manager_type::header_pid; + + /** Returned by prepare_all_pid_trackers */ + struct prepare_all_pids_result_t { + /** The set of monitored application tids */ + std::set monitored_tids; + /** The mapping from event id to key */ + id_to_key_mappings_t id_to_key_mappings; + /** The stream descriptors to monitor */ + std::vector event_fds; + /** The stream descriptors to monitor (but that don't count towards the traced process total) */ + std::vector supplimentary_event_fds; + /** The set of pid-resumers for paused pids, which must be preserved until after the events are started */ + std::map paused_pids; + }; + + /** Returned by core_online_prepare */ + struct core_online_prepare_result_t { + /** The mapping from event id to key */ + id_to_key_mappings_t mappings; + /** The stream descriptors to monitor */ + std::vector event_fds; + /** The stream descriptors to monitor (but that don't count towards the traced process total) */ + std::vector supplimentary_event_fds; + /** The mmap */ + std::shared_ptr mmap_ptr; + /** The set of pid-resumers for paused pids, which must be preserved until after the events are started */ + std::map paused_pids; + }; + + /** Constructor */ + perf_capture_events_helper_t(std::shared_ptr const & configuration, + event_binding_manager_t && event_binding_manager, + std::set && monitored_pids) + : event_binding_manager(std::forward(event_binding_manager)), + monitored_pids(std::move(monitored_pids)), + is_system_wide(configuration->perf_config.is_system_wide), + stop_on_exit(configuration->session_data.stop_on_exit || !configuration->perf_config.is_system_wide), +#if (defined(GATOR_SELF_PROFILE) && GATOR_SELF_PROFILE) + profile_gator(true), +#else + // allow self profiling + // user can set --pid 0 to dynamically enable this feature + profile_gator(remove_pid_zero()), +#endif + // older kernels require monitoring of the sync-thread + requires_process_events_from_self((!configuration->perf_config.is_system_wide) + && (!configuration->perf_config.has_attr_clockid_support)), + // was perf_config.enable_on_exec but this causes us to miss the exec comm record associated with the initial command, plus + // enable on exec doesn't work for cpu-wide events. + // additionally, when profiling gator, must be turned off + enable_on_exec(configuration->enable_on_exec && !configuration->perf_config.is_system_wide + && configuration->perf_config.has_attr_clockid_support + && configuration->perf_config.has_attr_comm_exec && !profile_gator), + // should we pause pids using SIGSTOP when preparing pids or bringing a core online + stop_pids(configuration->stop_pids) + { +#if (defined(GATOR_SELF_PROFILE) && GATOR_SELF_PROFILE) + (void) remove_pid_zero(); +#endif + + if (requires_process_events_from_self) { + LOG_DEBUG("Tracing gatord as well as target application as no clock_id support"); + } + if (profile_gator) { + LOG_DEBUG("Tracing gatord as well as self-profiling requested"); + } + } + + /** @return True if self-profiling was requested, false otherwise */ + [[nodiscard]] bool is_profile_gator() const { return profile_gator; } + + /** @return True if the perf agent must also be profiled (as the older kernel does not support clock id configuration) */ + [[nodiscard]] bool is_requires_process_events_from_self() const { return requires_process_events_from_self; } + + /** @return True if the captured events are enable-on-exec, rather than started manually */ + [[nodiscard]] bool is_enable_on_exec() const { return enable_on_exec; } + + /** @return True if configured counter groups include the SPE group */ + [[nodiscard]] bool has_spe() const { return event_binding_manager.has_spe(); } + + /** @return True if stop on exit is set */ + [[nodiscard]] bool is_stop_on_exit() const { return stop_on_exit; } + + /** @return The set of monitored pids */ + [[nodiscard]] std::set const & get_monitored_pids() const { return monitored_pids; } + + /** @return The set of monitored pids */ + [[nodiscard]] std::set const & get_monitored_gatord_pids() const { return monitored_gatord_tids; } + + /** Add a pid to the list to be monitored */ + void add_monitored_pid(pid_t pid) { monitored_pids.insert(pid); } + + /** Mark the capture as having started */ + void set_capture_started() { event_binding_manager.set_capture_started(); } + + /** + * Add a set of tids to the set of monitored pids, but send SIGSTOP to them if required to. + * + * If the tids are stopped, they are held in a paused state until `clear_stopped_tids` is called. + * + * @param pids The set of pids to add + */ + void add_stoppable_pids(std::set const & pids) + { + if (stop_pids && !is_system_wide) { + // get the perf agent pids + auto [just_agent_tids, all_gatord_tids] = find_gatord_tids(); + (void) just_agent_tids; //gcc7 :-( + + // SIGSTOP all pids so that they wait + auto actual_tids = lnx::stop_all_tids(pids, all_gatord_tids, all_stopped_tids); + + // add the detected tids to the monitor + for (auto tid : actual_tids) { + add_monitored_pid(tid); + } + } + else { + // add the detected pids to the monitor + for (auto pid : pids) { + add_monitored_pid(pid); + } + } + } + + /** Clear the set of stopped tids, which will cause them to resume */ + void clear_stopped_tids() + { + initial_pause_complete = true; + all_stopped_tids.clear(); + } + + /** + * Remove the --app pid + * + * @param pid The forked pid + * @return true If all monitored pids are removed and stop_on_exec is set, otherwise false + */ + [[nodiscard]] bool remove_command_pid(pid_t pid) + { + monitored_pids.erase(pid); + + return monitored_pids.empty() && stop_on_exit; + } + + /** + * Prepare all the monitored pids, their child threads are detected and added to the event monitor + * + * @param is_terminate_requested A callable `bool()` that returns true if the capture is terminated asynchronously, false otherwise + * @return nullopt is returned if the capture should terminate (due to request or error), otherwise the list of id->key mappings and set of actually monitored tids is returned + */ + template + [[nodiscard]] std::optional prepare_all_pid_trackers( + Callback && is_terminate_requested) + { + std::set actually_monitored_tids {}; + std::set actually_monitored_gatord_tids {}; + id_to_key_mappings_t all_id_key_mappings {}; + std::map paused_pids {}; + std::vector event_fds {}; + std::vector supplimentary_event_fds {}; + + // collect the monitored pids and their tids + auto monitored_tids = find_monitored_tids(); + + // get the perf agent pids + auto [just_agent_tids, all_gatord_tids] = find_gatord_tids(); + + // dont actually do anything other than check for exit in s-w mode + if (!is_system_wide) { + // pause any tids to avoid racing thread creation? ? + if (stop_pids || initial_pause_complete) { + monitored_tids = filter_and_pause_tids(all_gatord_tids, monitored_tids, paused_pids); + } + + if (!prepare_app_tids(monitored_tids, + all_gatord_tids, + just_agent_tids, + actually_monitored_tids, + actually_monitored_gatord_tids, + all_id_key_mappings, + event_fds, + supplimentary_event_fds, + is_terminate_requested)) { + return std::nullopt; + } + } + else { + // remove any tids in all_gatord_tids from monitored_tids as for the stop-on-exit check + for (pid_t tid : monitored_tids) { + if (all_gatord_tids.count(tid) > 0) { + // remove it from monitored_*pids* as it should not count towards the all_requested_tids_exited check + monitored_pids.erase(tid); + continue; + } + + actually_monitored_tids.insert(tid); + } + } + + // stop now if terminated + if (is_terminate_requested()) { + return std::nullopt; + } + + // have all the requested pids exited? + auto const all_requested_tids_exited = + (actually_monitored_tids.empty() && ((!monitored_pids.empty()) || (!is_system_wide))); + + // replace the requested set with the actual set as it will be used later by the start_capture method + monitored_pids = actually_monitored_tids; + monitored_gatord_tids = std::move(actually_monitored_gatord_tids); + + // terminate if some pids were requested but none were actually monitored + if (stop_on_exit && all_requested_tids_exited) { + LOG_DEBUG("Terminating as no pids were monitorable"); + return std::nullopt; + } + + return prepare_all_pids_result_t {std::move(actually_monitored_tids), + std::move(all_id_key_mappings), + std::move(event_fds), + std::move(supplimentary_event_fds), + std::move(paused_pids)}; + } + + /** + * Start all the tracked pid events + * + * @return true on success, otherwise false if the capture should be terminated + */ + [[nodiscard]] bool start_all_pid_trackers() + { + // nothing to do ? + if (monitored_pids.empty() || is_system_wide) { + return true; + } + + // start each pid + std::size_t n_started = 0; + for (auto it = monitored_pids.begin(); it != monitored_pids.end();) { + auto pid = *it; + auto result = event_binding_manager.pid_track_start(pid); + switch (result.state) { + case aggregate_state_t::failed: { + LOG_ERROR("Could not profile pid=%d due to unexpected error", pid); + return false; + } + case aggregate_state_t::terminated: { + LOG_ERROR("Could not profile pid=%d as it has terminated", pid); + // erase item + it = monitored_pids.erase(it); + break; + } + case aggregate_state_t::offline: + case aggregate_state_t::usable: { + // these are fine + n_started += 1; + // move to next item + ++it; + break; + } + default: { + throw std::runtime_error("unexpected case aggregate_state_t"); + } + } + } + + // returning false indicates capture termination + return ((n_started > 0) || (!stop_on_exit)); + } + + /** + * Prepare any events when a cpu core comes online + * + * @param core_no The core no to online + * @param cluster_id The cluster id associated with that core + * @return An error code is returned on failure, or success if the core went offline but no error occured, otherwise the event binding manager result is returned for successful online event + */ + [[nodiscard]] lib::error_code_or_t core_online_prepare( + core_no_t core_no, + cpu_cluster_id_t cluster_id) + { + std::set additional_tids {}; + std::set supplimentary_tids {}; + std::map paused_pids {}; + std::vector event_fds {}; + std::vector supplimentary_event_fds {}; + + // Scan for any new tids; these will be added to the EBMs set of known tids and activated for any core that subsequently comes online (including this one) + // but not for any cores that are already online as it is assumed the tid will be tracked via the 'inherit' bit + if (!is_system_wide) { + // collect the monitored pids and their tids + auto monitored_tids = find_monitored_tids(); + + // get the perf agent pids + auto [just_agent_tids, all_gatord_tids] = find_gatord_tids(); + (void) just_agent_tids; // gcc 7 :-() + + // pause any tids to avoid racing thread creation? ? + if (stop_pids || initial_pause_complete) { + monitored_tids = filter_and_pause_tids(all_gatord_tids, monitored_tids, paused_pids); + } + + // collect the set of tids that are new + for (pid_t tid : monitored_tids) { + if (all_gatord_tids.count(tid) == 0) { + // new tid detected, save it for passing to core_online_prepare + additional_tids.insert(tid); + // and add to the set of tracked pids + if (monitored_pids.insert(tid).second) { + LOG_DEBUG("core_online_prepare detected new tid %d", tid); + } + } + } + + supplimentary_tids = std::move(all_gatord_tids); + } + + auto result = event_binding_manager.core_online_prepare(core_no, cluster_id, additional_tids); + + switch (result.state) { + case aggregate_state_t::failed: { + return {boost::system::error_code {boost::asio::error::bad_descriptor}}; + } + case aggregate_state_t::offline: + case aggregate_state_t::terminated: { + if (remove_terminated(result.terminated_pids) && stop_on_exit) { + return {boost::system::error_code {boost::asio::error::eof}}; + } + return {boost::system::error_code {}}; + } + case aggregate_state_t::usable: { + if (remove_terminated(result.terminated_pids) && stop_on_exit) { + return {boost::system::error_code {boost::asio::error::eof}}; + } + + for (auto entry : result.event_fds_by_pid) { + if ((entry.first == header_pid) || (supplimentary_tids.count(entry.first) > 0)) { + supplimentary_event_fds.emplace_back(entry.second); + } + else { + event_fds.emplace_back(entry.second); + } + } + + return core_online_prepare_result_t {std::move(result.mappings), + std::move(event_fds), + std::move(supplimentary_event_fds), + std::move(result.mmap_ptr), + std::move(paused_pids)}; + } + default: { + throw std::runtime_error("what aggregate_state_t is this?"); + } + }; + } + + /** + * Start the core after preparing it + * + * @param core_no The core to online + * @return A pair, being an error code, and a bool flag indicating online/offline state + */ + [[nodiscard]] std::pair core_online_start(core_no_t core_no) + { + // just finish, if the capture has not started + if (!event_binding_manager.is_capture_started()) { + return {boost::system::error_code {}, true}; + } + + // otherwise start the events + auto result = event_binding_manager.core_online_start(core_no); + + switch (result.state) { + case aggregate_state_t::failed: { + return {boost::system::error_code {boost::asio::error::bad_descriptor}, false}; + } + case aggregate_state_t::offline: + case aggregate_state_t::terminated: { + if (remove_terminated(result.terminated_pids) && stop_on_exit) { + return {boost::system::error_code {boost::asio::error::eof}, false}; + } + return {boost::system::error_code {}, false}; + } + case aggregate_state_t::usable: { + if (remove_terminated(result.terminated_pids) && stop_on_exit) { + return {boost::system::error_code {boost::asio::error::eof}, false}; + } + return {boost::system::error_code {}, true}; + } + default: { + throw std::runtime_error("what aggregate_state_t is this?"); + } + }; + } + + /** + * Close events associated with some core as the core went offline + * + * @param core_no The core that went offline + */ + void core_offline(core_no_t core_no) { event_binding_manager.core_offline(core_no); } + + private: + event_binding_manager_t event_binding_manager; + std::set monitored_pids; + std::set monitored_gatord_tids {}; + std::map all_stopped_tids {}; + bool const is_system_wide; + bool const stop_on_exit; + bool const profile_gator; + bool const requires_process_events_from_self; + bool const enable_on_exec; + bool const stop_pids; + bool initial_pause_complete {false}; + + /** + * Remove any monitored pids from the set, that are indicated as terminated by the event binding manager + * + * @param terminated_pids The pids indicated as terminated + * @return true if the set of monitored events is empty, false otherwise + */ + [[nodiscard]] bool remove_terminated(std::set const & terminated_pids) + { + // if no pids were terminated, don't check the monitored set, as it may be + // empty anyway in system-wide mode + if (terminated_pids.empty()) { + return false; + } + + for (auto pid : terminated_pids) { + monitored_pids.erase(pid); + } + + return monitored_pids.empty(); + } + + /** Remove pid zero from the set of monitored pids as it has special meaning. + * @return true if the pid was removed, false if the set did not contain it + */ + [[nodiscard]] bool remove_pid_zero() { return (monitored_pids.erase(0) != 0); } + + /** Prepare one pid with the event binding manager */ + [[nodiscard]] bool pid_track_prepare(pid_t tid, + std::set & actually_monitored_tids, + id_to_key_mappings_t & all_id_key_mappings, + std::vector & event_fds) + { + LOG_DEBUG("Attaching to pid %d", tid); + + // track another tid + auto result = event_binding_manager.pid_track_prepare(tid); + switch (result.state) { + case aggregate_state_t::failed: { + LOG_ERROR("Could not profile tid=%d due to unexpected error", tid); + return false; + } + case aggregate_state_t::terminated: { + LOG_ERROR("Could not profile tid=%d as it has terminated", tid); + return true; + } + case aggregate_state_t::offline: { + // nothing to do, the cpu was currently offline + actually_monitored_tids.insert(tid); + return true; + } + case aggregate_state_t::usable: { + // add the id->key mappings to the set for sending to the shell + all_id_key_mappings.insert(all_id_key_mappings.end(), + result.mappings.begin(), + result.mappings.end()); + // record the fact that it was successful + actually_monitored_tids.insert(tid); + // update event_fds_by_pid + event_fds.insert(event_fds.end(), + result.event_fds_by_core_no.begin(), + result.event_fds_by_core_no.end()); + return true; + } + default: { + throw std::runtime_error("unexpected case aggregate_state_t"); + } + } + } + + /** Prepare the various tids for app-profiling mode */ + template + [[nodiscard]] bool prepare_app_tids( + std::set const & monitored_tids, // NOLINT(bugprone-easily-swappable-parameters) + std::set const & all_gatord_tids, + std::set const & just_agent_tids, + std::set & actually_monitored_tids, + std::set & actually_monitored_gatord_tids, + id_to_key_mappings_t & all_id_key_mappings, + std::vector & event_fds, + std::vector & supplimentary_event_fds, + Callback && is_terminate_requested) + { + // prepare all the pids + for (pid_t tid : monitored_tids) { + // stop now if terminated + if (is_terminate_requested()) { + return false; + } + + // remove any tids in all_gatord_tids from monitored_tids as they are to be handled separately + if (all_gatord_tids.count(tid) > 0) { + LOG_DEBUG("Ignoring gatord pid %d", tid); + // remove it from monitored_*pids* as it should not count towards the all_requested_tids_exited check + monitored_pids.erase(tid); + continue; + } + + if (!pid_track_prepare(tid, actually_monitored_tids, all_id_key_mappings, event_fds)) { + return false; + } + } + + // if profile-self is requested then add everything from all_gatord_tids + if (profile_gator) { + for (pid_t tid : all_gatord_tids) { + + // stop now if terminated + if (is_terminate_requested()) { + return false; + } + + if (!pid_track_prepare(tid, + actually_monitored_gatord_tids, + all_id_key_mappings, + supplimentary_event_fds)) { + // it is only fatal if the pid came from the perf agent and requires_process_events_from_self is true + if (requires_process_events_from_self && (just_agent_tids.count(tid) > 0)) { + return false; + } + } + } + } + // otherwise, if just self is required, then add just_agent_tids + else if (requires_process_events_from_self) { + for (pid_t tid : just_agent_tids) { + // stop now if terminated + if (is_terminate_requested()) { + return false; + } + + if (!pid_track_prepare(tid, + actually_monitored_gatord_tids, + all_id_key_mappings, + supplimentary_event_fds)) { + return false; + } + } + } + + return true; + } + + /** + * Sends SIGSTOP to all the monitored tids (that are not gatord tids), then updates the list of monitored tids + * to reflect any additionally detected tids. The set of paused tids is stored for later resumption + * + * @param all_gatord_tids The set of all gatord tids (which must not be stopped) + * @param monitored_tids The set of app tids (which must be stopped) + * @param paused_pids The map from pid to SIGCONT resumer for all stopped/detected tids + * @return The set of tids that are actually to be monitored + */ + [[nodiscard]] std::set filter_and_pause_tids(std::set const & all_gatord_tids, + std::set const & monitored_tids, + std::map & paused_pids) + { + // pause all the pids in monitored_tids that are not in all_gatord_tids + return lnx::stop_all_tids( + monitored_tids, + all_gatord_tids, + // if the global paused set is still not resumed, then extend that, otherwise just temporarily pause them + (initial_pause_complete ? paused_pids : all_stopped_tids)); + } + + /** collect the monitored pids and their tids */ + [[nodiscard]] std::set find_monitored_tids() + { + std::set result {}; + for (pid_t pid : monitored_pids) { + lnx::addTidsRecursively(result, pid, true); + } + return result; + } + + /** Collect the set of pids that belong to this agent and the gatord parent process */ + [[nodiscard]] std::pair, std::set> find_gatord_tids() + { + // get the perf agent pids + std::set just_agent_tids = lnx::getChildTids(lib::getpid(), false); + // then copy it and repeat recursively for the parent (gatord-child) pids, which will ignore any children of the perf agent + // producing a set containing all gatord-child and agent threads, but not any forked command pid + std::set all_gatord_tids = just_agent_tids; + lnx::addTidsRecursively(all_gatord_tids, lib::getppid(), true); + + return {std::move(just_agent_tids), std::move(all_gatord_tids)}; + } + }; +} diff --git a/daemon/agents/perf/perf_capture_helper.h b/daemon/agents/perf/perf_capture_helper.h new file mode 100644 index 00000000..0390689f --- /dev/null +++ b/daemon/agents/perf/perf_capture_helper.h @@ -0,0 +1,869 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "Time.h" +#include "agents/agent_environment.h" +#include "agents/perf/async_perf_ringbuffer_monitor.hpp" +#include "agents/perf/cpufreq_counter.h" +#include "agents/perf/events/event_binding_manager.hpp" +#include "agents/perf/events/event_bindings.hpp" +#include "agents/perf/events/perf_activator.hpp" +#include "agents/perf/events/types.hpp" +#include "agents/perf/perf_buffer_consumer.h" +#include "agents/perf/perf_capture_events_helper.hpp" +#include "apc/misc_apc_frame_ipc_sender.h" +#include "async/continuations/async_initiate.h" +#include "async/continuations/continuation.h" +#include "async/continuations/operations.h" +#include "async/continuations/use_continuation.h" +#include "async/proc/async_exec.hpp" +#include "async/proc/async_process.hpp" +#include "async/proc/async_read_proc_maps.h" +#include "async/proc/async_read_proc_sys_dependencies.h" +#include "async/proc/async_wait_for_process.h" +#include "async/proc/process_monitor.hpp" +#include "ipc/messages.h" +#include "ipc/raw_ipc_channel_sink.h" +#include "lib/FsEntry.h" +#include "lib/error_code_or.hpp" +#include "lib/forked_process.h" +#include "linux/proc/ProcessChildren.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace agents::perf { + /** + * Provides various "leaf" operations for perf_capture_t + */ + template, + typename AsyncRingBufferMonitor = + async_perf_ringbuffer_monitor_t, + typename ProcessMonitor = async::proc::process_monitor_t> + class perf_capture_helper_t + : public std::enable_shared_from_this< + perf_capture_helper_t> { + public: + using perf_capture_events_helper_t = PerfCaptureEventsHelper; + using async_perf_ringbuffer_monitor_t = AsyncRingBufferMonitor; + using stream_descriptor_t = typename perf_capture_events_helper_t::stream_descriptor_t; + using process_monitor_t = ProcessMonitor; + + /** Constructor */ + perf_capture_helper_t(std::shared_ptr conf, + boost::asio::io_context & context, + process_monitor_t & process_monitor, + agent_environment_base_t::terminator terminator, + std::shared_ptr aprm, + perf_capture_events_helper_t && pceh, + std::shared_ptr cpu_info, + std::shared_ptr ipc_sink) + : configuration(std::move(conf)), + strand(context), + process_monitor(process_monitor), + terminator(std::move(terminator)), + cpu_info(std::move(cpu_info)), + ipc_sink(std::move(ipc_sink)), + misc_apc_frame_ipc_sender(std::make_shared(this->ipc_sink)), + async_perf_ringbuffer_monitor(std::move(aprm)), + perf_capture_events_helper(std::move(pceh)) + { + } + + /** @return True if the captured events are enable-on-exec, rather than started manually */ + [[nodiscard]] bool is_enable_on_exec() const { return perf_capture_events_helper.is_enable_on_exec(); } + + /** @return True if configured counter groups include the SPE group */ + [[nodiscard]] bool has_spe() const { return perf_capture_events_helper.has_spe(); } + + /** @return True if terminate was requested */ + [[nodiscard]] bool is_terminate_requested() const + { + return terminate_requested || async_perf_ringbuffer_monitor->is_terminate_requested(); + } + + /** Tell the events helper to mark the EBM as started so that events are enabled when the cores come online*/ + void enable_counters() + { + // tell the EBM that capture started + perf_capture_events_helper.set_capture_started(); + // start the ringbuffer timer + async_perf_ringbuffer_monitor->start_timer(); + } + + /** Spawn an observer of the one-shot-full event */ + void observe_one_shot_event() + { + using namespace async::continuations; + + // wait for one-shot mode terminate event + spawn("one-shot mode waiter", + async_perf_ringbuffer_monitor->async_wait_one_shot_full(use_continuation), + [st = this->shared_from_this()](bool) { + LOG_DEBUG("Stopping due to one shot mode"); + st->terminate(); + }); + } + + /** Mark capture as started */ + template + auto async_notify_start_capture(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this()]() -> polymorphic_continuation_t<> { + if (st->is_terminate_requested()) { + return {}; + } + + // trigger the capture to start + return st->ipc_sink->async_send_message(ipc::msg_capture_started_t {}, use_continuation) + | then([st](auto const & ec, auto const & /*msg*/) { + if (ec) { + st->terminate(); + } + }); + }, + std::forward(token)); + } + + /** Start pid monitoring */ + template + auto async_start_pids(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this()]() -> polymorphic_continuation_t<> { + if (st->is_terminate_requested()) { + return {}; + } + + LOG_DEBUG("Starting pid monitoring..."); + + // start any pids we are monitoring + return start_on(st->strand) // + | then([st]() -> polymorphic_continuation_t<> { + // prepare the event trackers + auto result = st->perf_capture_events_helper.prepare_all_pid_trackers( + [st]() { return st->is_terminate_requested(); }); + + // terminate on failure + if (!result) { + if (!st->is_terminate_requested()) { + st->terminate(); + } + return {}; + } + + // and send all the mappings (asynchronously) + spawn("process key->id mapping task", + st->misc_apc_frame_ipc_sender->async_send_keys_frame(result->id_to_key_mappings, + use_continuation) + | map_error(), + [st](bool failed) { + if (failed) { + st->terminate(); + } + }); + + // then track buffer + return st->async_perf_ringbuffer_monitor->async_add_additional_event_fds( + std::move(result->event_fds), + std::move(result->supplimentary_event_fds), + use_continuation) + | map_error() + // now possibly start the events + | then([st, paused_pids = std::move(result->paused_pids)]() mutable { + // ensure that the pids are resumed after we return + std::map pp {std::move(paused_pids)}; + // then start the events + if (!st->perf_capture_events_helper.start_all_pid_trackers()) { + LOG_DEBUG("start_all_pid_trackers returned false, terminating"); + st->terminate(); + } + }); + }); + }, + std::forward(token)); + } + + /** + * Wait for all capture data to be transmitted and the capture to end + */ + template + [[nodiscard]] auto async_wait_terminated(CompletionToken && token) + { + // currently, only requires waiting for the ringbuffer to drain so just forward the request + return async_perf_ringbuffer_monitor->async_wait_terminated(std::forward(token)); + } + + /** + * For a single cpu, read the initial counter values for any counters that must be polled on start up. + * + * Currently, this is only for the cpu_frequency counter. + * + * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) + * @param cpu_no The number of the cpu for which counters should be polled + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_read_initial_counter_value(std::uint64_t monotonic_start, + int cpu_no, + CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this(), monotonic_start, cpu_no]() { + return start_on(st->strand) // + | then([st, monotonic_start, cpu_no]() -> polymorphic_continuation_t<> { + //read the counter + auto counter = + read_cpu_frequency(cpu_no, + *st->cpu_info, + st->configuration->cluster_keys_for_cpu_frequency_counter); + + // skip if no value + if (!counter) { + return {}; + } + + // send the counter frame + std::array counter_values {{ + *counter, + }}; + + return st->misc_apc_frame_ipc_sender->async_send_perf_counters_frame( + monotonic_delta_now(monotonic_start), + counter_values, + use_continuation) // + | map_error(); + }); + }, + std::forward(token)); + } + + /** + * For all cpus, read the initial counter values for any counters that must be polled on start up. + * + * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_read_initial_counter_values(std::uint64_t monotonic_start, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this(), monotonic_start]() { + return start_on(st->strand) // + | then([st, monotonic_start]() { + return iterate(std::size_t {0}, + st->cpu_info->getNumberOfCores(), + [st, monotonic_start](auto cpu_no) { + return st->async_read_initial_counter_value( + monotonic_start, + cpu_no, + async::continuations::use_continuation); + }); + }); + }, + std::forward(token)); + } + + /** + * Poll all currently running processes/threads in /proc and write their basic properties (pid, tid, comm, exe) + * into the capture + * + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_read_process_properties(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this()]() { + return async::async_read_proc_sys_dependencies( + st->strand, + st->misc_apc_frame_ipc_sender, + [sw = st->configuration->perf_config.is_system_wide, + pids = st->perf_capture_events_helper.get_monitored_pids(), + gatord_pids = st->perf_capture_events_helper.get_monitored_gatord_pids()](int pid, + int tid) { + return sw || (pids.count(pid) > 0) || (pids.count(tid) > 0) + || (gatord_pids.count(pid) > 0) || (gatord_pids.count(tid) > 0); + }, + use_continuation) // + | map_error(); + }, + std::forward(token)); + } + + /** + * Poll all currently running processes/threads in /proc and write their `maps` file contents into the capture + * + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_read_process_maps(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this()]() { + return async::async_read_proc_maps( + st->strand, + st->misc_apc_frame_ipc_sender, + [sw = st->configuration->perf_config.is_system_wide, + pids = st->perf_capture_events_helper.get_monitored_pids(), + gatord_pids = st->perf_capture_events_helper.get_monitored_gatord_pids()](int pid) { + return sw || (pids.count(pid) > 0) || (gatord_pids.count(pid) > 0); + }, + use_continuation) + | map_error(); + }, + std::forward(token)); + } + + /** + * Read the kallsyms file and write into the capture + * + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_read_kallsyms(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this()]() -> polymorphic_continuation_t<> { + auto kallsyms = lib::FsEntry::create("/proc/kallsyms"); + + if ((!kallsyms.exists()) || (!kallsyms.canAccess(true, false, false))) { + return {}; + } + + auto contents = kallsyms.readFileContents(); + if (contents.empty()) { + return {}; + } + + return st->misc_apc_frame_ipc_sender->async_send_kallsyms_frame(std::move(contents), + use_continuation) + | map_error(); + }, + std::forward(token)); + } + + /** + * Send a core name apc frame + * + * @param cpu_no The cpu number of the core to send the message for + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_send_core_name_msg(int cpu_no, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this(), cpu_no]() { + return start_on(st->strand) // + | then([st, cpu_no]() -> polymorphic_continuation_t<> { + // Don't send information on a cpu we know nothing about + auto const cpu_ids = st->cpu_info->getCpuIds(); + + if ((cpu_no < 0) || (std::size_t(cpu_no) >= cpu_ids.size())) { + return {}; + } + + const int cpu_id = cpu_ids[cpu_no]; + if (cpu_id == -1) { + return {}; + } + + // we use cpuid lookup here for look up rather than clusters because it maybe a cluster + // that wasn't known at start up + auto it = st->configuration->cpuid_to_core_name.find(cpu_id); + if (it != st->configuration->cpuid_to_core_name.end()) { + return st->misc_apc_frame_ipc_sender->async_send_core_name(cpu_no, + cpu_id, + it->second, + use_continuation) + | map_error(); + } + + // create the core name string + lib::printf_str_t<32> buf {"Unknown (0x%.3x)", cpu_id}; + return st->misc_apc_frame_ipc_sender->async_send_core_name(cpu_no, + cpu_id, + std::string(buf), + use_continuation) + | map_error(); + }); + }, + std::forward(token)); + } + + /** + * Send the initial summary frame + * + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_send_summary_frame(std::uint64_t monotonic_start, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this(), monotonic_start]() { + return start_on(st->strand) // + | then([st, monotonic_start]() -> polymorphic_continuation_t<> { + auto state = + create_perf_driver_summary_state(st->configuration->perf_config, monotonic_start); + + if (!state) { + return start_with(boost::asio::error::make_error_code( + boost::asio::error::basic_errors::operation_aborted)) + | map_error(); + } + + return start_with() + // send the summary + | st->misc_apc_frame_ipc_sender->async_send_summary_message(std::move(*state), + use_continuation) // + | map_error() + // send core names + | iterate(std::size_t {0}, st->cpu_info->getNumberOfCores(), [st](int cpu_no) { + return st->async_send_core_name_msg(cpu_no, use_continuation); + }); + }); + }, + std::forward(token)); + } + + /** + * Rescan for any changes to the CPU info, sending the appropriate core name message + * + * @param cpu_no The core for which to enable events + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_rescan_cpu_info(int cpu_no, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this(), cpu_no]() { + return start_on(st->strand) // + | then([st, cpu_no]() { + // rescan the ids from proc / sysfs + st->cpu_info->updateIds(true); + // and update the capture + return st->async_send_core_name_msg(cpu_no, use_continuation); + }); + }, + std::forward(token)); + } + + /** + * Output any cpu online/offline event messages as part of a state change + * + * @param monotonic_start The capture start timestamp (in CLOCK_MONOTONIC_RAW) + * @param cpu_no The core for which to enable events + * @param online True if the core was online, false if it was offline + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_core_state_change_msg(std::uint64_t monotonic_start, + int cpu_no, + bool online, + CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this(), monotonic_start, cpu_no, online]() { + auto monotonic_delta = monotonic_delta_now(monotonic_start); + + return + // store the entry in the capture + st->misc_apc_frame_ipc_sender->async_send_cpu_online_frame(monotonic_delta, + cpu_no, + online, + use_continuation) + | map_error() + // and tell the shell + | st->ipc_sink->async_send_message( + ipc::msg_cpu_state_change_t {{monotonic_delta, cpu_no, online}}, + use_continuation) + | map_error_and_discard(); + }, + std::forward(token)); + } + + /** + * Activate all the perf events for a given core, and start observing them in the ring buffer, + * but do not necessarily enable the events. + * + * Events will only be enabled if `start_counters` was previously called, or we are `enable_on_exec` and + * `co_exec_child` was previously completed. + * + * @param cpu_no The core for which to enable events + * @param token The completion token for the async operation + * @return The async result will be a bool indicating true for successful onlining, and false for core is offline. + */ + template + [[nodiscard]] auto async_prepare_per_core_events([[maybe_unused]] int cpu_no, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this(), cpu_no]() { + return start_on(st->strand) // + | then([st, cpu_no]() -> polymorphic_continuation_t { + // prepare the events + auto error_or_result = + st->perf_capture_events_helper.core_online_prepare(core_no_t(cpu_no), + st->get_cluster_id(cpu_no)); + + if (auto const * error = lib::get_error(error_or_result)) { + return start_with(*error, false) // + | map_error(); + } + + auto result = lib::get_value(std::move(error_or_result)); + + // send all the mappings (asynchronously) + spawn("core key->id mapping task", + st->misc_apc_frame_ipc_sender->async_send_keys_frame(result.mappings, + use_continuation) + | map_error(), + [st](bool failed) { + if (failed) { + st->terminate(); + } + }); + + // then track buffer + return st->async_perf_ringbuffer_monitor->async_add_ringbuffer( + cpu_no, + std::move(result.event_fds), + std::move(result.supplimentary_event_fds), + std::move(result.mmap_ptr), + use_continuation) + | map_error() + // now possibly start the events + | then([st, cpu_no, paused_pids = std::move(result.paused_pids)]() mutable { + // ensure that the pids are resumed after we return + std::map pp {std::move(paused_pids)}; + // start the core + return st->perf_capture_events_helper.core_online_start(core_no_t(cpu_no)); + }) + | unpack_tuple() // + | map_error(); + }); + }, + std::forward(token)); + } + + /** + * Deactivate all the perf events for a given core and stop observing them. + * + * @param cpu_no The core for which to enable events + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_remove_per_core_events([[maybe_unused]] int cpu_no, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this(), cpu_no]() { + return start_on(st->strand) // + | then([st, cpu_no]() { st->perf_capture_events_helper.core_offline(core_no_t(cpu_no)); }) // + | st->async_perf_ringbuffer_monitor->await_mmap_removed(cpu_no, use_continuation); + }, + std::forward(token)); + } + + /** + * Launch any android package and then poll for the process to start. + * Once the process is detected as running, the list of tracked pids is updated. + * + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_wait_for_process(CompletionToken && token) + { + using namespace async::continuations; + using namespace std::chrono_literals; + + return async_initiate( + [st = this->shared_from_this()]() mutable { + return start_on(st->strand) // + | then([=]() mutable { + st->waiter = async::make_async_wait_for_process(st->strand.context(), + st->configuration->wait_process); + }) + | st->ipc_sink->async_send_message(ipc::msg_exec_target_app_t {}, use_continuation) + | map_error_and_discard() // + | then([=]() { return st->waiter->start(1ms, use_continuation); }) + | then([=](auto ec, auto pids) mutable { + st->waiter.reset(); + + LOG_DEBUG("DETECTED APP PIDS: (ec=%s)", ec.message().c_str()); + for (auto pid : pids) { + LOG_DEBUG(" %d", pid); + } + + if (ec) { + return ec; + } + + st->perf_capture_events_helper.add_stoppable_pids(pids); + + return boost::system::error_code {}; + }) + | map_error(); + }, + std::forward(token)); + } + + /** + * Fork (but not exec) the child process. The process is forked so that its pid is known + * and events may be attached to it. The process is only exec'd once the capture is + * ready to start. + * + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_fork_process(CompletionToken && token) + { + using namespace async::continuations; + return async_initiate( + [st = this->shared_from_this()]() mutable { + return start_on(st->strand) // + | then([st]() mutable -> polymorphic_continuation_t<> { + auto config = st->configuration; + if ((!config) || (!config->command)) { + return {}; + } + auto & command = *(config->command); + LOG_INFO("Starting command: %s...", command.command.c_str()); + return async::proc::async_create_process(st->process_monitor, + st->strand.context(), + async::proc::async_exec_args_t { + command.command, + command.args, + command.cwd, + command.uid, + command.gid, + }, + async::proc::discard_ioe, + async::proc::log_oe, + async::proc::log_oe, + use_continuation) // + | map_error() // + | post_on(st->strand) // + | then([st](auto command) { + LOG_DEBUG("Successfully forked child process"); + // save it for later + st->forked_command = command; + + // add its pid to the list of monitored pids + st->perf_capture_events_helper.add_monitored_pid(command->get_pid()); + }); + }); + }, + std::forward(token)); + } + + /** + * Prepare any process that should be profiled; at the end of this operation the list + * of tracked pids will contains one or more values prepresenting the processes to + * profile. + * When making a system-wide capture (without --app/--pid etc), or for where the pids + * are already specified (with --pid) then this operation is a nop. + * + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_prepare_process(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this()]() { + return start_with() + | do_if([st]() { return !st->configuration->wait_process.empty(); }, // + [st]() { return st->async_wait_for_process(use_continuation); }) + | do_if([st]() { return st->configuration->command.has_value(); }, // + [st]() { return st->async_fork_process(use_continuation); }); + }, + std::forward(token)); + } + + /** + * Exec the child process forked previously for --app + * + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_exec_child(CompletionToken && token) + { + using namespace async::continuations; + return async_initiate( + [st = this->shared_from_this()]() { + return start_on(st->strand) // + | then([st]() { + // clear stopped_tids which will resume any stopped pids + st->perf_capture_events_helper.clear_stopped_tids(); + + // and exec the forked process + auto fc = st->forked_command; + if (!fc) { + return; + } + + // spawn the termination observer + spawn("Waiting for process termination", + async::proc::async_run_to_completion(fc, use_continuation) + | then([st, forked_pid = fc->get_pid()](auto ec, bool by_signal, int status) { + if (ec) { + LOG_DEBUG("Exec monitor failed with error %s", ec.message().c_str()); + } + else if (by_signal) { + LOG_ERROR("Command exited with signal %d", status); + } + else if (status != 0) { + LOG_ERROR("Command exited with code %d", status); + } + else { + LOG_DEBUG("Command exited with code 0"); + } + + if ((!by_signal) + && (status == lib::forked_process_t::failure_exec_invalid)) { + LOG_ERROR( + "Failed to run command %s: Permission denied or is a directory", + st->configuration->command->command.c_str()); + st->on_command_exited(forked_pid, true); + } + else if ((!by_signal) + && (status == lib::forked_process_t::failure_exec_not_found)) { + LOG_ERROR("Failed to run command %s: Command not found", + st->configuration->command->command.c_str()); + st->on_command_exited(forked_pid, true); + } + else { + st->on_command_exited(forked_pid, false); + } + }), + [st](bool) { st->terminate(); }); + }); + }, + std::forward(token)); + } + + /** + * Tell shell that the agent is ready to start + * + * @param token The completion token for the async operation + */ + template + [[nodiscard]] auto async_notify_agent_ready(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = this->shared_from_this()]() { + return start_on(st->strand) // + | then([st]() { + //let the shell know we are ready + auto const & monitored_pids = st->perf_capture_events_helper.get_monitored_pids(); + return st->ipc_sink->async_send_message( + ipc::msg_capture_ready_t {std::vector(monitored_pids.begin(), // + monitored_pids.end())}, + use_continuation) // + | map_error_and_discard(); + }); + }, + std::forward(token)); + } + + /** Cancel any outstanding asynchronous operations that need special handling. */ + void terminate() + { + boost::asio::post(strand, [st = this->shared_from_this()]() mutable { + LOG_DEBUG("Terminating"); + + auto w = st->waiter; + if (w) { + w->cancel(); + } + + st->perf_capture_events_helper.clear_stopped_tids(); + + auto fc = st->forked_command; + if (fc) { + fc->abort(); + } + + st->async_perf_ringbuffer_monitor->terminate(); + + st->terminator(); + }); + } + + private: + std::shared_ptr configuration; + boost::asio::io_context::strand strand; + process_monitor_t & process_monitor; + agent_environment_base_t::terminator terminator; + std::shared_ptr cpu_info; + std::shared_ptr ipc_sink; + std::shared_ptr misc_apc_frame_ipc_sender; + std::shared_ptr> waiter {}; + std::shared_ptr async_perf_ringbuffer_monitor; + std::shared_ptr forked_command; + perf_capture_events_helper_t perf_capture_events_helper; + bool terminate_requested {false}; + + [[nodiscard]] cpu_cluster_id_t get_cluster_id(int cpu_no) + { + runtime_assert((cpu_no >= 0) && (std::size_t(cpu_no) < cpu_info->getNumberOfCores()), "Unexpected cpu no"); + + return cpu_cluster_id_t(cpu_info->getClusterIds()[cpu_no]); + } + + void on_command_exited(pid_t pid, bool exec_failed) + { + using namespace async::continuations; + + if (exec_failed) { + spawn("command exited handler", + ipc_sink->async_send_message( + ipc::msg_capture_failed_t {ipc::capture_failed_reason_t::command_exec_failed}, + use_continuation), + [st = this->shared_from_this()](bool) { st->terminate(); }); + } + else if (perf_capture_events_helper.remove_command_pid(pid)) { + terminate(); + } + } + }; +} diff --git a/daemon/agents/perf/perf_frame_packer.cpp b/daemon/agents/perf/perf_frame_packer.cpp new file mode 100644 index 00000000..65b4f5f3 --- /dev/null +++ b/daemon/agents/perf/perf_frame_packer.cpp @@ -0,0 +1,218 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#include "agents/perf/perf_frame_packer.hpp" + +#include "ISender.h" +#include "agents/perf/async_buffer_builder.h" +#include "k/perf_event.h" + +namespace agents::perf { + + namespace { + using sample_word_type = std::uint64_t; + + constexpr std::size_t sample_word_size = sizeof(sample_word_type); + + constexpr std::size_t max_data_header_size = buffer_utils::MAXSIZE_PACK32 // frame type + + buffer_utils::MAXSIZE_PACK32 // cpu + + 4; // size + constexpr std::size_t max_data_payload_size = + std::min(ISender::MAX_RESPONSE_LENGTH - max_data_header_size, + 1024UL * 1024UL); // limit frame size + + constexpr std::size_t max_aux_header_size = buffer_utils::MAXSIZE_PACK32 // frame type + + buffer_utils::MAXSIZE_PACK32 // cpu + + buffer_utils::MAXSIZE_PACK64 // tail + + buffer_utils::MAXSIZE_PACK32; // size + constexpr std::size_t max_aux_payload_size = + std::min(ISender::MAX_RESPONSE_LENGTH - max_aux_header_size, + 1024UL * 1024UL); // limit frame size + + [[nodiscard]] bool append_data_record(apc_buffer_builder_t> & builder, + lib::Span data) + { + for (auto w : data) { + builder.packInt64(w); + } + + return builder.getWriteIndex() <= max_data_payload_size; + } + + template + [[nodiscard]] T const * ring_buffer_ptr(char const * base, std::size_t position_masked) + { + return reinterpret_cast(base + position_masked); + } + + template + [[nodiscard]] T const * ring_buffer_ptr(char const * base, std::size_t position, std::size_t size_mask) + { + return ring_buffer_ptr(base, position & size_mask); + } + + } + + std::pair> extract_one_perf_data_apc_frame( + int cpu, + lib::Span data_mmap, + std::uint64_t const header_head, // NOLINT(bugprone-easily-swappable-parameters) + std::uint64_t const header_tail) + { + auto const buffer_mask = data_mmap.size() - 1; // assumes the size is a power of two (which it should be) + + // don't output an empty frame + if (header_tail >= header_head) { + return {header_tail, {}}; + } + + std::vector buffer {}; + buffer.reserve(max_data_payload_size); + apc_buffer_builder_t builder {buffer}; + + // add the frame header + builder.beginFrame(FrameType::PERF_DATA); + builder.packInt(cpu); + // skip the length field for now + auto const length_index = builder.getWriteIndex(); + builder.advanceWrite(4); + + // accumulate one or more records to fit into some message + auto current_tail = header_tail; + while (current_tail < header_head) { + auto const * record_header = + ring_buffer_ptr(data_mmap.data(), current_tail, buffer_mask); + auto const record_size = + std::max(8U, (record_header->size + sample_word_size - 1) & ~(sample_word_size - 1)); + auto const record_end = current_tail + record_size; + std::size_t const base_masked = (current_tail & buffer_mask); + std::size_t const end_masked = (record_end & buffer_mask); + + // incomplete or currently written record; is it possible? lets just be defensive + if (record_end > header_head) { + break; + } + + auto const have_wrapped = end_masked < base_masked; + + std::size_t const first_size = (have_wrapped ? (data_mmap.size() - base_masked) : record_size); + std::size_t const second_size = (have_wrapped ? end_masked : 0); + + // encode the chunk + auto const current_offset = builder.getWriteIndex(); + + LOG_TRACE("appending record %p (%zu -> %" PRIu64 ") (%zu / %zu / %u / %zu / %zu / %zu)", + record_header, + record_size, + record_end, + base_masked, + end_masked, + have_wrapped, + first_size, + second_size, + current_offset); + + if ((!append_data_record(builder, + { + ring_buffer_ptr(data_mmap.data(), base_masked), + first_size / sample_word_size, + })) + || (!append_data_record(builder, + { + ring_buffer_ptr(data_mmap.data(), 0), + second_size / sample_word_size, + }))) { + LOG_TRACE("... aborted"); + builder.trimTo(current_offset); + break; + } + + LOG_TRACE("current tail = %" PRIu64, record_end); + + // next + current_tail = record_end; + } + + // don't output an empty frame + if (current_tail == header_tail) { + return {header_tail, {}}; + } + + // now fill in the length field + auto const bytes_written = builder.getWriteIndex() - (length_index + 4); + LOG_TRACE("setting length = %zu", bytes_written); + builder.writeLeUint32At(length_index, bytes_written); + + // commit the frame + builder.endFrame(); + + return {current_tail, std::move(buffer)}; + } + + std::pair, lib::Span> extract_one_perf_aux_apc_frame_data_span_pair( + lib::Span aux_mmap, + std::uint64_t const header_head, + std::uint64_t const header_tail) + { + // ignore invalid / empty input + if (header_head <= header_tail) { + return {{}, {}}; + } + + const std::size_t buffer_mask = aux_mmap.size() - 1; + + // will be 'length' at most otherwise somehow wrapped many times + const std::size_t total_data_size = std::min(header_head - header_tail, // + aux_mmap.size()); + const std::uint64_t head = header_head; + + // will either be the same as 'tail' or will be > if somehow wrapped multiple times + const std::uint64_t tail = (header_head - total_data_size); + + const std::size_t tail_masked = (tail & buffer_mask); + const std::size_t head_masked = (head & buffer_mask); + + const bool have_wrapped = head_masked < tail_masked; + + const std::size_t first_size = (have_wrapped ? (aux_mmap.size() - tail_masked) : total_data_size); + const std::size_t second_size = (have_wrapped ? head_masked : 0); + const std::size_t combined_size = first_size + second_size; + + if (first_size >= max_aux_payload_size) { + // send just the first lot + return {{aux_mmap.data() + tail_masked, max_aux_payload_size}, {}}; + } + + if (combined_size >= max_aux_payload_size) { + auto const trimmed_second_size = max_aux_payload_size - first_size; + // send both, but second is trimmed + return {{aux_mmap.data() + tail_masked, first_size}, {aux_mmap.data(), trimmed_second_size}}; + } + + // send both, will fit in one message + return {{aux_mmap.data() + tail_masked, first_size}, {aux_mmap.data(), second_size}}; + } + + std::pair> encode_one_perf_aux_apc_frame(int cpu, + lib::Span first_span, + lib::Span second_span, + std::uint64_t const header_tail) + { + auto const combined_size = first_span.size() + second_span.size(); + + // create the message data + std::vector buffer {}; + buffer.reserve(combined_size); + + apc_buffer_builder_t builder {buffer}; + + builder.beginFrame(FrameType::PERF_AUX); + builder.packInt(cpu); + builder.packInt64(header_tail); + builder.packIntSize(combined_size); + builder.writeBytes(first_span.data(), first_span.size()); + builder.writeBytes(second_span.data(), second_span.size()); + builder.endFrame(); + + return {header_tail + combined_size, std::move(buffer)}; + } +} diff --git a/daemon/agents/perf/perf_frame_packer.hpp b/daemon/agents/perf/perf_frame_packer.hpp new file mode 100644 index 00000000..a3e6e0a1 --- /dev/null +++ b/daemon/agents/perf/perf_frame_packer.hpp @@ -0,0 +1,59 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "lib/Span.h" +#include "lib/error_code_or.hpp" + +#include +#include +#include + +namespace agents::perf { + + /** + * Given the current state of the perf data section of some mmap, extract some apc data frame from it + * + * @param cpu The cpu associated with the mmap + * @param data_mmap The data area within the mmap + * @param header_head The data_head value + * @param header_tail The data_tail value + * @return A pair, being the new value for data_tail, and the encoded apc_frame message + */ + [[nodiscard]] std::pair> extract_one_perf_data_apc_frame( + int cpu, + lib::Span data_mmap, + std::uint64_t header_head, + std::uint64_t header_tail); + + /** + * Given the current state of the perf aux section of some mmap, extract a pair of spans (pair to account for ringbuffer wrapping) representing + * the chunk of raw aux data to send as part of some apc_frame message. The pair of spans will be sized such that the are no larger than the max sized + * apc_frame payload. The pair of spans should be treated as one contiguous chunk of aux data (even though the two spans them selves may not be contiguous). + * + * @param aux_mmap The aux area within the mmap + * @param header_head The aux_head value + * @param header_tail The aux_tail value + * @return A pair, being the first and second parts of the aux data chunk. + */ + [[nodiscard]] std::pair, lib::Span> extract_one_perf_aux_apc_frame_data_span_pair( + lib::Span aux_mmap, + std::uint64_t header_head, + std::uint64_t header_tail); + + /** + * Given the pair of aux spans that were previously extracted by `extract_one_perf_aux_apc_frame_data_span_pair`, + * encode them into an apc_frame message + * + * @param cpu The cpu associated with the mmap + * @param first_span The first span returned by extract_one_perf_aux_apc_frame_data_span_pair + * @param second_span The second span returned by extract_one_perf_aux_apc_frame_data_span_pair + * @param header_tail The value of header_tail that was passed to extract_one_perf_aux_apc_frame_data_span_pair + * @return A pair, being the new value for aux_tail, and the encoded apc_frame message + */ + [[nodiscard]] std::pair> encode_one_perf_aux_apc_frame( + int cpu, + lib::Span first_span, + lib::Span second_span, + std::uint64_t header_tail); +} diff --git a/daemon/agents/perf/record_types.h b/daemon/agents/perf/record_types.h index d0582df3..cf27b8cf 100644 --- a/daemon/agents/perf/record_types.h +++ b/daemon/agents/perf/record_types.h @@ -8,16 +8,12 @@ namespace agents::perf { struct buffer_config_t { - std::size_t page_size; - std::size_t data_buffer_size; - std::size_t aux_buffer_size; - }; - - struct perf_buffer_t { - void * data_buffer; - void * aux_buffer; - int fd; - int aux_fd; + /// must be power of 2 + size_t page_size; + /// must be power of 2 multiple of pageSize + size_t data_buffer_size; + /// must be power of 2 multiple of pageSize (or 0) + size_t aux_buffer_size; }; using data_word_t = std::uint64_t; diff --git a/daemon/agents/perf/source_adapter.cpp b/daemon/agents/perf/source_adapter.cpp new file mode 100644 index 00000000..dfdf2d38 --- /dev/null +++ b/daemon/agents/perf/source_adapter.cpp @@ -0,0 +1,152 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ +#include "agents/perf/source_adapter.h" + +#include "ExitStatus.h" +#include "ISender.h" +#include "Logging.h" +#include "Time.h" +#include "agents/perf/perf_agent_worker.h" +#include "lib/Assert.h" + +#include + +namespace agents::perf { + perf_source_adapter_t::perf_source_adapter_t(sem_t & sender_sem, + ISender & sender, + std::function agent_started_callback, + std::function exec_target_app_callback, + std::function profiling_started_callback) + : sender_sem(sender_sem), + sender(sender), + agent_started_callback(std::move(agent_started_callback)), + exec_target_app_callback(std::move(exec_target_app_callback)), + profiling_started_callback(std::move(profiling_started_callback)), + capture_ended(false) + { + } + + std::optional perf_source_adapter_t::sendSummary() { return {getTime()}; } + + void perf_source_adapter_t::run(std::uint64_t monotonicStart, std::function endSession) + { + { + auto lock = std::unique_lock(event_mutex); + end_session = std::move(endSession); + shutdown_initiated_from_shell = false; + } + + // ask the agent to start capturing + auto started_success = capture_controller->async_start_capture(monotonicStart, boost::asio::use_future); + + // release the lock while we wait for this to happen + if (!started_success.get()) { + LOG_ERROR("Perf agent failed to start capture"); + handleException(); + } + } + + void perf_source_adapter_t::interrupt() + { + perf_capture_controller_t * capture_controller = nullptr; + + { + auto l = std::unique_lock(event_mutex); + shutdown_initiated_from_shell = true; + capture_controller = this->capture_controller.get(); + } + + if (capture_controller != nullptr) { + auto f = capture_controller->async_stop_capture(boost::asio::use_future); + f.get(); + } + } + + bool perf_source_adapter_t::write(ISender & /*sender*/) { return capture_ended.load(std::memory_order_relaxed); } + + void perf_source_adapter_t::set_controller(std::unique_ptr controller) + { + auto lock = std::unique_lock(event_mutex); + capture_controller = std::move(controller); + } + + void perf_source_adapter_t::on_capture_ready() + { + std::function f; + { + auto lock = std::unique_lock(event_mutex); + f = std::move(agent_started_callback); + } + if (f) { + f(true); + } + } + + void perf_source_adapter_t::on_capture_started() + { + std::function f; + { + auto lock = std::unique_lock(event_mutex); + f = profiling_started_callback; + + runtime_assert(!agent_started_callback, "on_capture_ready was not called"); + } + if (f) { + f(); + } + } + + void perf_source_adapter_t::on_capture_completed() + { + capture_ended.store(true, std::memory_order_relaxed); + + std::function local_agent_started; + std::function local_end_session; + + { + auto lock = std::unique_lock(event_mutex); + local_agent_started = std::move(agent_started_callback); + if (!shutdown_initiated_from_shell) { + local_end_session = std::move(end_session); + } + } + + if (local_agent_started) { + local_agent_started(false); + } + + if (local_end_session) { + local_end_session(); + } + else { + sem_post(&sender_sem); + } + } + + void perf_source_adapter_t::on_apc_frame_received(const std::vector & frame) + { + auto const length = frame.size(); + runtime_assert(length <= ISender::MAX_RESPONSE_LENGTH, "too large apc_frame msg received"); + + sender.writeData(frame.data(), static_cast(length), ResponseType::APC_DATA); + } + + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) + void perf_source_adapter_t::on_capture_failed(ipc::capture_failed_reason_t reason) + { + // NOLINTNEXTLINE(hicpp-multiway-paths-covered) + switch (reason) { + case ipc::capture_failed_reason_t::command_exec_failed: { + LOG_DEBUG("Capture failed due to exec failure"); + handleException(); + break; + } + default: { + LOG_DEBUG("Unexpected capture failure reason"); + handleException(); + break; + } + } + } + + void perf_source_adapter_t::exec_target_app() { exec_target_app_callback(); } +} diff --git a/daemon/agents/perf/source_adapter.h b/daemon/agents/perf/source_adapter.h new file mode 100644 index 00000000..6c591589 --- /dev/null +++ b/daemon/agents/perf/source_adapter.h @@ -0,0 +1,121 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ +#pragma once + +#define BUFFER_USE_SESSION_DATA + +#include "Buffer.h" +#include "ISender.h" +#include "Source.h" +#include "agents/perf/perf_agent_worker.h" +#include "ipc/messages.h" + +#include +#include +#include +#include + +#include + +namespace agents::perf { + + class perf_source_adapter_t : public PrimarySource { + public: + explicit perf_source_adapter_t(sem_t & sender_sem, + ISender & sender, + std::function agent_started_callback, + std::function exec_target_app_callback, + std::function profiling_started_callback); + + ~perf_source_adapter_t() override = default; + + /** + * Note: this method doesn't actually send the summary frame as that is done by the new + * perf agent at the start of capture. This method is required by the legacy code as it is + * the point at which the monotonic start time is established for all sources. + */ + std::optional sendSummary() override; + + /** The main blocking body of the source which runs and waits for the capture to complete */ + void run(std::uint64_t monotonicStart, std::function endSession) override; + + /** + * Called by Child to stop the capture from the "shell" side. We need to ask the agent + * to shut down. + */ + void interrupt() override; + + /** @return True when capture ended */ + bool write(ISender & sender) override; + + /** + * Called by the agent worker to set itself as a controller for this adapter. + * + * CALLED FROM THE ASIO THREAD POOL + */ + void set_controller(std::unique_ptr controller); + + /** + * Called by the agent worker once the agent ready message has been received. + * + * CALLED FROM THE ASIO THREAD POOL + */ + void on_capture_ready(); + + /** + * Called by the agent worker once the start message has been sent successfully. + * + * CALLED FROM THE ASIO THREAD POOL + */ + void on_capture_started(); + + /** + * Called by the agent worker when the shutdown message is received. If the shutdown was initiated + * by the agent then the endSession callback needs to be invoked so that the Child process can + * terminate any other sources. + * + * CALLED FROM THE ASIO THREAD POOL + */ + void on_capture_completed(); + + /** + * Called by the worker to deliver any APC frames that get sent by the agent. + * + * CALLED FROM THE ASIO THREAD POOL + */ + void on_apc_frame_received(const std::vector & frame); + + /** + * Called by the worker when the capture fails + * + * CALLED FROM THE ASIO THREAD POOL + * + * @param reason The failure reason + */ + void on_capture_failed(ipc::capture_failed_reason_t reason); + + /** + * Called by the worker to trigger the launch of some android apk + * + * CALLED FROM THE ASIO THREAD POOL + */ + void exec_target_app(); + + private: + sem_t & sender_sem; + ISender & sender; + + // variables that are guarded by the event_mutex + std::mutex event_mutex; + std::function agent_started_callback; + std::function exec_target_app_callback; + std::function profiling_started_callback; + std::unique_ptr capture_controller; + bool shutdown_initiated_from_shell; + std::function end_session; + + // capture_ended is an atomic var rather than being guarded by the event mutex since this + // ends up getting checked frequently when the write buffer is flushed. doing it this way + // we can avoid the overhead of stronger memory ordering imposed by the mutex. + std::atomic_bool capture_ended; + }; +} diff --git a/daemon/agents/perf/sync_generator.h b/daemon/agents/perf/sync_generator.h index c98ea73b..22c6f611 100644 --- a/daemon/agents/perf/sync_generator.h +++ b/daemon/agents/perf/sync_generator.h @@ -87,7 +87,7 @@ namespace agents::perf { std::vector buffer; SyncThread thread; - void write(pid_t pid, pid_t tid, std::uint64_t monotonic_raw, std::uint64_t vcnt, std::uint64_t freq) + void write(pid_t pid, pid_t tid, std::uint64_t freq, std::uint64_t monotonic_raw, std::uint64_t vcnt) { buffer.resize(max_sync_buffer_size); auto builder = apc_buffer_builder_t(buffer); @@ -108,7 +108,12 @@ namespace agents::perf { builder.endFrame(); - LOG_DEBUG("Committing perf sync data written: %i", builder.getWriteIndex()); + LOG_DEBUG("Committing perf sync data (freq: %" PRIu64 ", monotonic: %" PRIu64 ", vcnt: %" PRIu64 + ") written: %zu bytes", + freq, + monotonic_raw, + vcnt, + builder.getWriteIndex()); // Send frame sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(buffer)}, diff --git a/daemon/agents/perf/tracepoint_formats.h b/daemon/agents/perf/tracepoint_formats.h deleted file mode 100644 index b98ef316..00000000 --- a/daemon/agents/perf/tracepoint_formats.h +++ /dev/null @@ -1,126 +0,0 @@ -/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ - -#pragma once - -#include "GatorException.h" -#include "Logging.h" -#include "agents/perf/async_buffer_builder.h" -#include "apc/misc_apc_frame_ipc_sender.h" -#include "async/async_buffer.hpp" -#include "async/continuations/async_initiate.h" -#include "async/continuations/operations.h" -#include "async/continuations/use_continuation.h" -#include "lib/Format.h" -#include "lib/FsEntry.h" -#include "lib/Span.h" - -#include -#include - -namespace agents::perf { - - class tracepoint_formats_t : public std::enable_shared_from_this { - - public: - tracepoint_formats_t(const TraceFsConstants & traceFsConstants, - std::shared_ptr sender) - : traceFsConstants(traceFsConstants), sender(sender) {}; - - template - auto async_send_tracepoint_formats(lib::Span tracepoint_names, CompletionToken && token) - { - using namespace async::continuations; - return async_initiate>( - [st = shared_from_this(), tracepoint_names_local = tracepoint_names]() mutable { - return start_with(tracepoint_names_local.begin(), boost::system::error_code {}) - | loop( - [st, tracepoint_names_local](auto it, auto ec) { - return start_with( - (it != tracepoint_names_local.end() && ec == boost::system::error_code {}), - it, - ec); - }, - [st](auto it, auto ec) mutable { - return st->continue_send_tracepoint_formats(*it) - | then([=](auto ec) mutable { return start_with(++it, ec); }); - }) - | then([](auto /*it*/, auto ec) { return ec; }); - }, - std::forward(token)); - } - - template - auto async_send_tracepoint_header_page(CompletionToken && token) - { - return boost::asio::async_initiate( - [st = shared_from_this()](auto && handler) { - st->do_send_tracepoint_header_page_or_event_frame(std::move(handler), HEADER_PAGE); - }, - token); - } - - template - auto async_send_tracepoint_header_event(CompletionToken && token) - { - return boost::asio::async_initiate( - [st = shared_from_this()](auto && handler) { - st->do_send_tracepoint_header_page_or_event_frame(std::move(handler), HEADER_EVENT); - }, - token); - } - - private: - static constexpr std::string_view HEADER_PAGE = "header_page"; - static constexpr std::string_view HEADER_EVENT = "header_event"; - static constexpr std::string_view FORMAT = "format"; - static constexpr std::string_view PATH_SEPARATOR = "/"; - - const TraceFsConstants & traceFsConstants; - std::shared_ptr sender; - - async::continuations::polymorphic_continuation_t continue_send_tracepoint_formats( - std::string_view tracepoint_name) - { - using namespace async::continuations; - - auto path = lib::FsEntry::create(std::string(traceFsConstants.path__events) + PATH_SEPARATOR.data() - + tracepoint_name.data() + PATH_SEPARATOR.data() + FORMAT.data()); // - - if (!path.canAccess(true, false, false)) { - LOG_DEBUG("Can't access file %s", path.path().c_str()); - return start_with(boost::asio::error::make_error_code(boost::asio::error::no_permission)); - } - - auto format_contents = path.readFileContents(); - if (format_contents.empty()) { - LOG_DEBUG("File (%s) content is empty", path.path().c_str()); - return start_with(boost::asio::error::make_error_code(boost::asio::error::misc_errors::not_found)); - } - return sender->async_send_format_frame(format_contents, use_continuation); - } - - template - void do_send_tracepoint_header_page_or_event_frame(Handler && handler, std::string_view fs_detail) - { - auto path = lib::FsEntry::create(lib::Format() << traceFsConstants.path__events << PATH_SEPARATOR.data() - << fs_detail.data()); // - if (!path.canAccess(true, false, false)) { - LOG_DEBUG("Can't access file %s", path.path().c_str()); - handler(boost::asio::error::make_error_code(boost::asio::error::no_permission)); - return; - } - auto format_contents = path.readFileContents(); - if (format_contents.empty()) { - LOG_DEBUG("File (%s) content is empty", path.path().c_str()); - handler(boost::asio::error::make_error_code(boost::asio::error::misc_errors::not_found)); - return; - } - if (fs_detail == HEADER_EVENT) { - sender->async_send_header_event_frame(format_contents, std::move(handler)); - } - else if (fs_detail == HEADER_PAGE) { - sender->async_send_header_page_frame(format_contents, std::move(handler)); - } - } - }; -} diff --git a/daemon/agents/spawn_agent.cpp b/daemon/agents/spawn_agent.cpp index c6912065..c1c22692 100644 --- a/daemon/agents/spawn_agent.cpp +++ b/daemon/agents/spawn_agent.cpp @@ -1,13 +1,19 @@ /* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ #include "agents/spawn_agent.h" +#include "android/Spawn.h" #include "lib/Assert.h" #include "lib/AutoClosingFd.h" #include "lib/FsEntry.h" +#include "lib/Process.h" +#include "lib/error_code_or.hpp" +#include "lib/forked_process.h" + +#include namespace agents { /** Simple agent spawner */ - std::optional simple_agent_spawner_t::spawn_agent_process(char const * agent_name) + lib::error_code_or_t simple_agent_spawner_t::spawn_agent_process(char const * agent_name) { runtime_assert(agent_name != nullptr, "agent_name is required"); @@ -16,35 +22,107 @@ namespace agents { if (!gatord_exe) { LOG_ERROR("Could not resolve /proc/self/exe to gatord's real path. Did it get deleted?"); - return {}; + return boost::system::errc::make_error_code(boost::system::errc::no_such_file_or_directory); + } + + auto stdio_fds = lib::stdio_fds_t::create_pipes(); + if (auto const * error = lib::get_error(stdio_fds)) { + return *error; + } + + std::vector arguments {agent_name}; + + if (::logging::is_log_enable_trace()) { + arguments.emplace_back("--trace"); + } + + return lib::forked_process_t::fork_process(true, + gatord_exe->path(), + arguments, + {}, + {}, + lib::get_value(std::move(stdio_fds))); + } + + android_pkg_agent_spawner_t::~android_pkg_agent_spawner_t() noexcept + { + if (remote_exe_path) { + gator::process::system("run-as '" + package_name + "' rm -f '" + *remote_exe_path + "'"); } + } + + /** Android agent spawner */ + lib::error_code_or_t android_pkg_agent_spawner_t::spawn_agent_process( + char const * agent_name) + { +#ifdef APP_GATOR_GDB_SERVER + constexpr std::size_t extra_args = 2; +#else + constexpr std::size_t extra_args = 0; +#endif - return lib::popen(gatord_exe->path().c_str(), agent_name); + runtime_assert(agent_name != nullptr, "agent_name is required"); + + if (!remote_exe_path) { + remote_exe_path = gator::android::deploy_to_package(package_name); + if (!remote_exe_path) { + return boost::system::errc::make_error_code(boost::system::errc::permission_denied); + } + } + + auto stdio_fds = lib::stdio_fds_t::create_pipes(); + if (auto const * error = lib::get_error(stdio_fds)) { + return *error; + } + + std::array arguments {{ + package_name, +#ifdef APP_GATOR_GDB_SERVER + "./gdbserver", + ":5001", +#endif + *remote_exe_path, + agent_name, + }}; + + return lib::forked_process_t::fork_process(true, + "run-as", + arguments, + {}, + {}, + lib::get_value(std::move(stdio_fds))); } /** Spawn the agent */ - std::optional spawn_agent(boost::asio::io_context & io_context, - i_agent_spawner_t & spawner, - char const * agent_name, - logging::agent_log_reader_t::consumer_fn_t log_consumer) + lib::error_code_or_t spawn_agent(boost::asio::io_context & io_context, + i_agent_spawner_t & spawner, + char const * agent_name, + logging::agent_log_reader_t::consumer_fn_t log_consumer) { - auto process = spawner.spawn_agent_process(agent_name); - if (!process) { - return {}; + auto result = spawner.spawn_agent_process(agent_name); + if (auto const * error = lib::get_error(result)) { + return *error; } - return agent_process_t { - ipc::raw_ipc_channel_source_t::create(io_context, lib::AutoClosingFd {process->out}), - ipc::raw_ipc_channel_sink_t::create(io_context, lib::AutoClosingFd {process->in}), - logging::agent_log_reader_t::create(io_context, lib::AutoClosingFd {process->err}, std::move(log_consumer)), - process->pid, + auto process = lib::get_value(std::move(result)); + auto ipc_source = ipc::raw_ipc_channel_source_t::create(io_context, std::move(process.get_stdout_read())); + auto ipc_sink = ipc::raw_ipc_channel_sink_t::create(io_context, std::move(process.get_stdin_write())); + auto log_reader = logging::agent_log_reader_t::create(io_context, + std::move(process.get_stderr_read()), + std::move(log_consumer)); + + return spawn_agent_result_t { + std::move(ipc_source), + std::move(ipc_sink), + std::move(log_reader), + std::move(process), }; } /** Spawn the agent with the default logger */ - std::optional spawn_agent(boost::asio::io_context & io_context, - i_agent_spawner_t & spawner, - char const * agent_name) + lib::error_code_or_t spawn_agent(boost::asio::io_context & io_context, + i_agent_spawner_t & spawner, + char const * agent_name) { return spawn_agent(io_context, spawner, diff --git a/daemon/agents/spawn_agent.h b/daemon/agents/spawn_agent.h index 8651ab75..27bba058 100644 --- a/daemon/agents/spawn_agent.h +++ b/daemon/agents/spawn_agent.h @@ -3,9 +3,12 @@ #include "Logging.h" #include "agents/agent_worker.h" +#include "async/continuations/async_initiate.h" +#include "async/proc/process_monitor.hpp" #include "ipc/raw_ipc_channel_sink.h" #include "ipc/raw_ipc_channel_source.h" -#include "lib/Popen.h" +#include "lib/error_code_or.hpp" +#include "lib/forked_process.h" #include "logging/agent_log.h" #include @@ -18,6 +21,9 @@ namespace agents { /** ID string used to identify the external annotation agent */ constexpr std::string_view agent_id_ext_source {"agent-external"}; + /** ID string used to identify the external annotation agent */ + constexpr std::string_view agent_id_perf {"agent-perf"}; + /** * An interface for some class that will spawn a gatord agent process */ @@ -30,15 +36,48 @@ namespace agents { * @param agent_name The agent ID string * @return The process popen result */ - virtual std::optional spawn_agent_process(char const * agent_name) = 0; + virtual lib::error_code_or_t spawn_agent_process(char const * agent_name) = 0; }; /** * Default, simple implementation of i_agent_spawner_t that just forks/exec the current process binary */ - class simple_agent_spawner_t : public i_agent_spawner_t { + class simple_agent_spawner_t final : public i_agent_spawner_t { + public: + lib::error_code_or_t spawn_agent_process(char const * agent_name) override; + }; + + /** + * Android implementation of i_agent_spawner_t that runs the agent using `run-as` within some package + */ + class android_pkg_agent_spawner_t final : public i_agent_spawner_t { public: - std::optional spawn_agent_process(char const * agent_name) override; + explicit android_pkg_agent_spawner_t(std::string package_name) : package_name(std::move(package_name)) {} + android_pkg_agent_spawner_t(android_pkg_agent_spawner_t const &) = delete; + android_pkg_agent_spawner_t & operator=(android_pkg_agent_spawner_t const &) = delete; + android_pkg_agent_spawner_t(android_pkg_agent_spawner_t &&) noexcept = default; + android_pkg_agent_spawner_t & operator=(android_pkg_agent_spawner_t &&) noexcept = default; + ~android_pkg_agent_spawner_t() noexcept override; + + lib::error_code_or_t spawn_agent_process(char const * agent_name) override; + + private: + std::string package_name; + std::optional remote_exe_path {}; + }; + + /** + * The spawned agent resutl object + */ + struct spawn_agent_result_t { + /** The IPC source, for reading messages from the agent */ + std::shared_ptr ipc_source; + /** The IPC sink, for sending message to the agent */ + std::shared_ptr ipc_sink; + /** The agent log reader and consumer */ + std::shared_ptr agent_log_reader; + /** The forked process object */ + lib::forked_process_t forked_process; }; /** @@ -51,8 +90,8 @@ namespace agents { std::shared_ptr ipc_sink; /** The agent log reader and consumer */ std::shared_ptr agent_log_reader; - /** The agent process pid */ - pid_t pid; + /** The forked process object */ + lib::forked_process_t forked_process; }; /** @@ -62,12 +101,12 @@ namespace agents { * @param spawner The process spawner * @param agent_name The agent ID * @param log_consumer An agent log consumer function - * @return The spawned process properties (or empty if popen failed) + * @return The spawned process properties (or error if failed) */ - std::optional spawn_agent(boost::asio::io_context & io_context, - i_agent_spawner_t & spawner, - char const * agent_name, - logging::agent_log_reader_t::consumer_fn_t log_consumer); + lib::error_code_or_t spawn_agent(boost::asio::io_context & io_context, + i_agent_spawner_t & spawner, + char const * agent_name, + logging::agent_log_reader_t::consumer_fn_t log_consumer); /** * Spawn an agent process using the default log consumer @@ -75,39 +114,67 @@ namespace agents { * @param io_context The io_context for async operations * @param spawner The process spawner * @param agent_name The agent ID - * @return The spawned process properties (or empty if popen failed) + * @return The spawned process properties (or error if failed) */ - std::optional spawn_agent(boost::asio::io_context & io_context, - i_agent_spawner_t & spawner, - char const * agent_name); + lib::error_code_or_t spawn_agent(boost::asio::io_context & io_context, + i_agent_spawner_t & spawner, + char const * agent_name); /** * Spawn an agent process and construct the associated worker class that owns the IPC objects and interacts with the agent * * @tparam T The worker wrapper class type + * * @param io_context The io_context for async operations * @param spawner The process spawner - * @return A pair containing the process pid and a shared_ptr to T (or `{0, nullptr}` if an error occured) + * @param observer The state change observer callback + * + * Async operation produces a pair containing the process pid and a shared_ptr to T (or `{0, nullptr}` if an error occured) */ - template - static std::pair> spawn_agent_worker( - boost::asio::io_context & io_context, - i_agent_spawner_t & spawner, - i_agent_worker_t::state_change_observer_t && observer, - Args &&... args) + template + inline auto async_spawn_agent_worker(boost::asio::io_context & io_context, + i_agent_spawner_t & spawner, + i_agent_worker_t::state_change_observer_t && observer, + CompletionToken && token, + Args &&... args) { - // spawn the process - auto agent = spawn_agent(io_context, spawner, T::get_agent_process_id()); - if (!agent) { - return {0, nullptr}; - } + using namespace async::continuations; + + return async_initiate( + [&io_context, &spawner, observer = std::move(observer)](auto &&... args) mutable { + return start_with(std::forward(args)...) // + | then([&io_context, &spawner, observer = std::move(observer)]( + auto &&... args) mutable -> std::pair> { + // spawn the process + auto spawn_result = spawn_agent(io_context, spawner, T::get_agent_process_id()); + if (auto const * error = lib::get_error(spawn_result)) { + return {0, nullptr}; + } + + // get the value from the spawn result + auto spawn_properties = lib::get_value(std::move(spawn_result)); + auto const pid = spawn_properties.forked_process.get_pid(); - // construct the worker class - auto ptr = std::make_shared(io_context, *agent, std::move(observer), std::forward(args)...); + // construct the worker class + auto ptr = std::make_shared(io_context, + agent_process_t { + spawn_properties.ipc_source, + spawn_properties.ipc_sink, + spawn_properties.agent_log_reader, + std::move(spawn_properties.forked_process), + }, + std::move(observer), + std::forward(args)...); - // start it (after construction since it is assumed to be enable_shared_from_this) - ptr->start(); + // start it - this should exec the agent, returning the result of the exec command + if (!ptr->start()) { + LOG_ERROR("Agent process created, but exec failed"); + } - return {agent->pid, std::move(ptr)}; + return {pid, std::move(ptr)}; + }); + }, + std::forward(token), + std::forward(args)...); } } diff --git a/daemon/android/GatorAndroidSetupHandler.cpp b/daemon/android/GatorAndroidSetupHandler.cpp index 11b7fa3a..4457adae 100644 --- a/daemon/android/GatorAndroidSetupHandler.cpp +++ b/daemon/android/GatorAndroidSetupHandler.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ #include "GatorAndroidSetupHandler.h" @@ -20,13 +20,12 @@ namespace { constexpr std::string_view SECURITY_PERF_HIDDEN_PROP = "security.perf_harden"; constexpr int ONE_KB = 1024; - constexpr int LARGE_BUFFER_CORE_MULTIPLIER = 64; + constexpr int LARGE_BUFFER_CORE_MULTIPLIER = 512; constexpr int SMALL_BUFFER_MULTIPLIER = 129; //128 +1 constexpr int DEBUG_PERF_EVENT_MLOCK_KB = 8196; } -GatorAndroidSetupHandler::GatorAndroidSetupHandler(SessionData & sessionData, UserClassification userClassification) - : LinuxEnvironmentConfig(sessionData) +GatorAndroidSetupHandler::GatorAndroidSetupHandler(UserClassification userClassification) { auto propSecurityperfHarden = readProperty(SECURITY_PERF_HIDDEN_PROP); if (propSecurityperfHarden) { diff --git a/daemon/android/GatorAndroidSetupHandler.h b/daemon/android/GatorAndroidSetupHandler.h index 16c7de65..633572aa 100644 --- a/daemon/android/GatorAndroidSetupHandler.h +++ b/daemon/android/GatorAndroidSetupHandler.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ #ifndef ANDROID_GATORANDROIDSETUPHANDLER_H_ #define ANDROID_GATORANDROIDSETUPHANDLER_H_ @@ -26,7 +26,7 @@ namespace gator::android { * debug.perf_event_mlock_kb 8192 * security.perf_harden 0 */ - GatorAndroidSetupHandler(SessionData & sessionData, UserClassification userClassification); + explicit GatorAndroidSetupHandler(UserClassification userClassification); /** * Will restore the android security properties @@ -34,7 +34,7 @@ namespace gator::android { * were configured before profiling. * The initial values are saved during configureAndroidSecurityProperties, and will be used for restore. */ - virtual ~GatorAndroidSetupHandler() noexcept; + ~GatorAndroidSetupHandler() noexcept override; private: std::map initialPropertyMap {}; diff --git a/daemon/android/Spawn.h b/daemon/android/Spawn.h index 9feb6709..646938f5 100644 --- a/daemon/android/Spawn.h +++ b/daemon/android/Spawn.h @@ -1,21 +1,20 @@ -/* Copyright (C) 2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ #pragma once #include #include -namespace gator { namespace android { +namespace gator::android { /** * @brief Copies the gator executable the Android app's home folder and * returns its full path. - * + * * @param package_name The package to copy into. * @return std::optional The path to the copied gator binary * or nullopt if the copy failed. */ std::optional deploy_to_package(const std::string & package_name); -} // namespace android -} // namespace gator +} diff --git a/daemon/android/ThermalDriver.cpp b/daemon/android/ThermalDriver.cpp index 4bb08bf0..a57ecd0c 100644 --- a/daemon/android/ThermalDriver.cpp +++ b/daemon/android/ThermalDriver.cpp @@ -103,7 +103,7 @@ namespace gator::android { mxmlElementSetAttr(node, "class", "activity"); mxmlElementSetAttr(node, "units", ""); mxmlElementSetAttr(node, "average_selection", "yes"); - mxmlElementSetAttr(node, "series_composition", "stack"); + mxmlElementSetAttr(node, "series_composition", "stacked"); mxmlElementSetAttr(node, "rendering_type", "bar"); mxmlElementSetAttr(node, "proc", "no"); mxmlElementSetAttr(node, "per_core", "no"); diff --git a/daemon/apc/misc_apc_frame_ipc_sender.h b/daemon/apc/misc_apc_frame_ipc_sender.h index 8f7caba9..519b50b3 100644 --- a/daemon/apc/misc_apc_frame_ipc_sender.h +++ b/daemon/apc/misc_apc_frame_ipc_sender.h @@ -4,9 +4,12 @@ #include "Protocol.h" #include "Time.h" +#include "agents/perf/events/types.hpp" #include "agents/perf/perf_driver_summary.h" #include "apc/perf_apc_frame_utils.h" #include "apc/summary_apc_frame_utils.h" +#include "async/continuations/async_initiate.h" +#include "async/continuations/stored_continuation.h" #include "ipc/messages.h" #include "ipc/raw_ipc_channel_sink.h" #include "k/perf_event.h" @@ -30,67 +33,63 @@ namespace apc { template auto async_send_perf_events_attributes_frame(perf_event_attr const & pea, int key, CompletionToken && token) { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, - bytes = apc::make_perf_events_attributes_frame(pea, key)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); + using namespace async::continuations; + + return async_initiate_explicit( + [ipc_sink = ipc_sink, bytes = apc::make_perf_events_attributes_frame(pea, key)](auto && sc) mutable { + submit(ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(bytes)}, + use_continuation) // + | then([](auto const & ec, auto const &) { return ec; }), + std::forward(sc)); }, - token); + std::forward(token)); } template - auto async_send_keys_frame(lib::Span ids, lib::Span keys, CompletionToken && token) + auto async_send_keys_frame( + lib::Span const> mappings, + CompletionToken && token) { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, bytes = apc::make_keys_frame(ids, keys)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); + using namespace async::continuations; + + return async_initiate_explicit( + [ipc_sink = ipc_sink, bytes = apc::make_keys_frame(mappings)](auto && sc) mutable { + submit(ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(bytes)}, + use_continuation) // + | then([](auto const & ec, auto const &) { return ec; }), + std::forward(sc)); }, - token); - } - - template - auto async_send_old_keys_frame(lib::Span keys, lib::Span bytes, CompletionToken && token) - { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, bytes = apc::make_old_keys_frame(keys, bytes)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); - }, - token); + std::forward(token)); } template auto async_send_format_frame(std::string_view format, CompletionToken && token) { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, bytes = apc::make_format_frame(format)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); + using namespace async::continuations; + + return async_initiate_explicit( + [ipc_sink = ipc_sink, bytes = apc::make_format_frame(format)](auto && sc) mutable { + submit(ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(bytes)}, + use_continuation) // + | then([](auto const & ec, auto const &) { return ec; }), + std::forward(sc)); }, - token); + std::forward(token)); } template auto async_send_maps_frame(int pid, int tid, std::string_view maps, CompletionToken && token) { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, bytes = apc::make_maps_frame(pid, tid, maps)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); + using namespace async::continuations; + + return async_initiate_explicit( + [ipc_sink = ipc_sink, bytes = apc::make_maps_frame(pid, tid, maps)](auto && sc) mutable { + submit(ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(bytes)}, + use_continuation) // + | then([](auto const & ec, auto const &) { return ec; }), + std::forward(sc)); }, - token); + std::forward(token)); } template @@ -100,53 +99,47 @@ namespace apc { std::string_view comm, CompletionToken && token) { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, bytes = apc::make_comm_frame(pid, tid, image, comm)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); + using namespace async::continuations; + + return async_initiate_explicit( + [ipc_sink = ipc_sink, bytes = apc::make_comm_frame(pid, tid, image, comm)](auto && sc) mutable { + submit(ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(bytes)}, + use_continuation) // + | then([](auto const & ec, auto const &) { return ec; }), + std::forward(sc)); }, - token); + std::forward(token)); } template - auto async_send_cpu_online_frame(monotonic_delta_t timestamp, int cpu, CompletionToken && token) + auto async_send_cpu_online_frame(monotonic_delta_t timestamp, int cpu, bool online, CompletionToken && token) { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, bytes = apc::make_cpu_online_frame(timestamp, cpu)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); - }, - token); - } + using namespace async::continuations; - template - auto async_send_cpu_offine_frame(monotonic_delta_t timestamp, int cpu, CompletionToken && token) - { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, bytes = apc::make_cpu_offline_frame(timestamp, cpu)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); + return async_initiate_explicit( + [ipc_sink = ipc_sink, + bytes = (online ? apc::make_cpu_online_frame(timestamp, cpu) // + : apc::make_cpu_offline_frame(timestamp, cpu))](auto && sc) mutable { + submit(ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(bytes)}, + use_continuation) // + | then([](auto const & ec, auto const &) { return ec; }), + std::forward(sc)); }, - token); + std::forward(token)); } - template auto async_send_kallsyms_frame(std::string_view kallsyms, CompletionToken && token) { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, bytes = apc::make_kallsyms_frame(kallsyms)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); + using namespace async::continuations; + + return async_initiate_explicit( + [ipc_sink = ipc_sink, bytes = apc::make_kallsyms_frame(kallsyms)](auto && sc) mutable { + submit(ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(bytes)}, + use_continuation) // + | then([](auto const & ec, auto const &) { return ec; }), + std::forward(sc)); }, - token); + std::forward(token)); } template @@ -154,68 +147,77 @@ namespace apc { lib::Span counters, CompletionToken && token) { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, - bytes = apc::make_perf_counters_frame(timestamp, counters)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); + using namespace async::continuations; + + return async_initiate_explicit( + [ipc_sink = ipc_sink, bytes = apc::make_perf_counters_frame(timestamp, counters)](auto && sc) mutable { + submit(ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(bytes)}, + use_continuation) // + | then([](auto const & ec, auto const &) { return ec; }), + std::forward(sc)); }, - token); + std::forward(token)); } template auto async_send_header_page_frame(std::string_view header_page, CompletionToken && token) { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, bytes = apc::make_header_page_frame(header_page)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); + using namespace async::continuations; + + return async_initiate_explicit( + [ipc_sink = ipc_sink, bytes = apc::make_header_page_frame(header_page)](auto && sc) mutable { + submit(ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(bytes)}, + use_continuation) // + | then([](auto const & ec, auto const &) { return ec; }), + std::forward(sc)); }, - token); + std::forward(token)); } template auto async_send_header_event_frame(std::string_view header_event, CompletionToken && token) { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, bytes = apc::make_header_event_frame(header_event)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); + using namespace async::continuations; + + return async_initiate_explicit( + [ipc_sink = ipc_sink, bytes = apc::make_header_event_frame(header_event)](auto && sc) mutable { + submit(ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(bytes)}, + use_continuation) // + | then([](auto const & ec, auto const &) { return ec; }), + std::forward(sc)); }, - token); + std::forward(token)); } template auto async_send_summary_message(agents::perf::perf_driver_summary_state_t const & state, CompletionToken && token) { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, bytes = apc::make_summary_message(state)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); + using namespace async::continuations; + + return async_initiate_explicit( + [ipc_sink = ipc_sink, bytes = apc::make_summary_message(state)](auto && sc) mutable { + submit(ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(bytes)}, + use_continuation) // + | then([](auto const & ec, auto const &) { return ec; }), + std::forward(sc)); }, - token); + std::forward(token)); } template auto async_send_core_name(int core, int cpuid, std::string_view name, CompletionToken && token) { - return boost::asio::async_initiate( - [ipc_sink = ipc_sink, bytes = apc::make_core_name_message(core, cpuid, name)](auto && handler) mutable { - ipc_sink->async_send_message( - ipc::msg_apc_frame_data_t {std::move(bytes)}, - [h = std::forward(handler)](auto const & ec, - auto const & /* msg */) mutable { h(ec); }); + using namespace async::continuations; + + return async_initiate_explicit( + [ipc_sink = ipc_sink, bytes = apc::make_core_name_message(core, cpuid, name)](auto && sc) mutable { + submit(ipc_sink->async_send_message(ipc::msg_apc_frame_data_t {std::move(bytes)}, + use_continuation) // + | then([](auto const & ec, auto const &) { return ec; }), + std::forward(sc)); }, - token); + std::forward(token)); } private: diff --git a/daemon/apc/perf_apc_frame_utils.h b/daemon/apc/perf_apc_frame_utils.h index c6088cbc..741967b9 100644 --- a/daemon/apc/perf_apc_frame_utils.h +++ b/daemon/apc/perf_apc_frame_utils.h @@ -6,6 +6,7 @@ #include "Protocol.h" #include "Time.h" #include "agents/perf/async_buffer_builder.h" +#include "agents/perf/events/types.hpp" #include "k/perf_event.h" #include "lib/Assert.h" #include "lib/Span.h" @@ -60,39 +61,25 @@ namespace apc { return frame; } - [[nodiscard]] inline std::vector make_keys_frame(lib::Span ids, lib::Span keys) + [[nodiscard]] inline std::vector make_keys_frame( + lib::Span const> mappings) { - runtime_assert(ids.size() == keys.size(), "expected equal numbers of ids and keys"); std::vector frame {}; agents::perf::apc_buffer_builder_t> buffer(frame); detail::make_perf_attr_frame_header(CodeType::KEYS, buffer); - int count = static_cast(ids.size()); + auto const count = static_cast(mappings.size()); + runtime_assert((count >= 0) && (mappings.size() == std::size_t(count)), "too many mappings !"); buffer.packInt(count); - for (int i = 0; i < count; ++i) { - buffer.packInt64(static_cast(ids[i])); - buffer.packInt(keys[i]); + for (auto const & mapping : mappings) { + buffer.packInt64(static_cast(mapping.first)); + buffer.packInt(static_cast(mapping.second)); } buffer.endFrame(); return frame; } - [[nodiscard]] inline std::vector make_old_keys_frame(lib::Span keys, lib::Span bytes) - { - std::vector frame {}; - agents::perf::apc_buffer_builder_t> buffer(frame); - detail::make_perf_attr_frame_header(CodeType::KEYS_OLD, buffer); - - buffer.packInt(static_cast(keys.size())); - for (int const key : keys) { - buffer.packInt(key); - } - buffer.writeBytes(bytes.data(), bytes.size()); - buffer.endFrame(); - return frame; - } - [[nodiscard]] inline std::vector make_format_frame(std::string_view format) { std::vector frame {}; diff --git a/daemon/apc/perf_counter.h b/daemon/apc/perf_counter.h index 16d1eee9..01db1022 100644 --- a/daemon/apc/perf_counter.h +++ b/daemon/apc/perf_counter.h @@ -1,5 +1,7 @@ /* Copyright (C) 2022 by Arm Limited. All rights reserved. */ +#pragma once + #include namespace apc { diff --git a/daemon/async/async_buffer.hpp b/daemon/async/async_buffer.hpp deleted file mode 100644 index 50a86bb4..00000000 --- a/daemon/async/async_buffer.hpp +++ /dev/null @@ -1,591 +0,0 @@ -/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ - -#pragma once - -#include "ISender.h" -#include "async/completion_handler.h" -#include "lib/memory_pool.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace async { - - // default the buffer size to fit the biggest possible frame - constexpr std::size_t default_memory_pool_size = ISender::MAX_RESPONSE_LENGTH; - - /** - * An asynchronous producer/consumer buffer with fixed (but configurable) size. - * - * Producers may request some space within the buffer, which may be fulfilled asynchronously: - * - if space is available, the request may complete directly - * - otherwise, the request is added to a queue and completes as the space is made free by some consumer operation - * - * A single consumer will asynchronously wait for data to be available in the buffer. When it becomes available the - * consumer is called with one of the buffers to send. Once the send is complete, the consumer should reregister in order - * to receiver another (by async_consume). - * - * The producer is passed a 'commit_action_t' object which it will use to notify the buffer that it has completed writing - * to its allocated space. It can also use this object to discard the buffer (should it need to). The object will discard - * on destruction if not previously commited. The commit method can take an optional consumer token of the form 'void(bool)' - * which allows the producer to register for notification that the data was sent. The bool argument will be true on - * successful send, and false otherwise. - * - * Likewise, the consumer is passed a 'consume_action_t' object which it must invoke only once the data has been fully - * consumed. If the object goes out of scope it will automatically mark the space as consumed, so the consumer must ensure - * the proper life of said object. The 'consume_action_t::consume()' method takes an optional bool argument (which defaults - * to true). This value is passed to the producer's notification token (if one was passed) to notify that the send - * was successful (or not). If the object goes out of scope before consume is called it will be notified that the send - * was not successful. - * - * Both commit_action_t and consume_action_t are move-only types. - * - */ - class async_buffer_t { - public: - class commit_action_t; - class consume_action_t; - - using const_buffer_type = lib::Span; - using mutable_buffer_type = lib::Span; - using sent_completion_handler_type = completion_handler_ref_t; - using wait_for_space_handler_type = completion_handler_ref_t; - - private: - struct reclaim_entry_t { - reclaim_entry_t(lib::alloc::memory_pool_t::pointer_type allocation, bool ready) - : allocation(std::move(allocation)), ready(ready) - { - } - - lib::alloc::memory_pool_t::pointer_type allocation; - bool ready; - }; - - struct wait_for_space_entry_t { - wait_for_space_entry_t(std::size_t n, wait_for_space_handler_type handler) - : n(n), handler(std::move(handler)) - { - } - - std::size_t n; - wait_for_space_handler_type handler; - }; - - struct wait_for_commit_entry_t { - wait_for_commit_entry_t(std::size_t n, reclaim_entry_t & reclaim_entry, const_buffer_type buffers) - : n(n), reclaim_entry(reclaim_entry), buffers(buffers) - { - } - - std::size_t n; - reclaim_entry_t & reclaim_entry; - const_buffer_type buffers; - }; - - struct ready_to_send_entry_t { - ready_to_send_entry_t(std::size_t n, - reclaim_entry_t & reclaim_entry, - const_buffer_type buffers, - sent_completion_handler_type handler) - : n(n), reclaim_entry(reclaim_entry), buffers(buffers), handler(std::move(handler)) - { - } - - std::size_t n; - reclaim_entry_t & reclaim_entry; - const_buffer_type buffers; - sent_completion_handler_type handler; - }; - - struct sending_entry_t { - sending_entry_t(std::size_t n, reclaim_entry_t & reclaim_entry, sent_completion_handler_type handler) - : n(n), reclaim_entry(reclaim_entry), handler(std::move(handler)) - { - } - - std::size_t n; - reclaim_entry_t & reclaim_entry; - sent_completion_handler_type handler; - }; - - struct one_shot_t { - template - explicit one_shot_t(Handler && h) : running_total {0}, handler {std::forward(h)} - { - } - - std::size_t running_total; - completion_handler_ref_t handler; - }; - - using wait_for_commit_iter_t = std::list::iterator; - using wait_for_consume_iter_t = std::list::iterator; - - public: - /** - * Passed to the 'request space'' completion handler, provides a call back to mark the region as being commited or discarded. - */ - class commit_action_t { - public: - constexpr commit_action_t() = default; - - // move only - commit_action_t(commit_action_t const &) = delete; - commit_action_t & operator=(commit_action_t const &) = delete; - - commit_action_t(commit_action_t && that) noexcept - : parent(std::exchange(that.parent, nullptr)), entry(std::exchange(that.entry, {})) - { - } - - commit_action_t & operator=(commit_action_t && that) noexcept - { - if (this != &that) { - commit_action_t temp {std::move(that)}; - std::swap(parent, temp.parent); - std::swap(entry, temp.entry); - } - return *this; - } - - ~commit_action_t() noexcept - { - // discard the entry (if commit was not called) - discard(); - } - - void commit(sent_completion_handler_type && handler_ref = {}) - { - boost::system::error_code ec; - if (!commit(ec, entry->n, std::move(handler_ref))) { - throw std::runtime_error("Buffer commit failed: " + ec.message()); - } - } - - /** Mark the buffer region as commited and ready to send */ - [[nodiscard]] bool commit(boost::system::error_code & ec, - std::size_t size, - sent_completion_handler_type && handler_ref = {}) - { - if (size > entry->n) { - ec = boost::system::errc::make_error_code(boost::system::errc::value_too_large); - return false; - } - auto * p = std::exchange(parent, nullptr); - auto e = std::exchange(entry, {}); - - if ((p != nullptr) && (e != wait_for_commit_iter_t {})) { - p->commit_entry(e, size, std::move(handler_ref)); - } - - return true; - } - - /** Mark the buffer region as discarded */ - void discard() - { - auto * p = std::exchange(parent, nullptr); - auto e = std::exchange(entry, {}); - - if ((p != nullptr) && (e != wait_for_commit_iter_t {})) { - p->discard_entry(e); - } - } - - private: - friend class async_buffer_t; - - constexpr commit_action_t(async_buffer_t & parent, wait_for_commit_iter_t const & entry) - : parent(&parent), entry(entry) - { - } - - async_buffer_t * parent = nullptr; - wait_for_commit_iter_t entry {}; - }; - - /** - * Passed to the 'consume' completion handler, provides a call back to mark the region as being consumed. - */ - class consume_action_t { - public: - constexpr consume_action_t() = default; - - // move only - consume_action_t(consume_action_t const &) = delete; - consume_action_t & operator=(consume_action_t const &) = delete; - - consume_action_t(consume_action_t && that) noexcept - : parent(std::exchange(that.parent, nullptr)), entry(std::exchange(that.entry, {})) - { - } - - consume_action_t & operator=(consume_action_t && that) noexcept - { - if (this != &that) { - consume_action_t temp {std::move(that)}; - std::swap(parent, temp.parent); - std::swap(entry, temp.entry); - } - return *this; - } - - ~consume_action_t() noexcept - { - // consume the entry (if not already done so) - consume(false); - } - - /** Mark the buffer region as consumed */ - void consume(bool success = true) - { - auto * p = std::exchange(parent, nullptr); - auto e = std::exchange(entry, {}); - - if ((p != nullptr) && (e != wait_for_consume_iter_t {})) { - p->consume_entry(e, success); - } - } - - private: - friend class async_buffer_t; - - constexpr consume_action_t(async_buffer_t & parent, wait_for_consume_iter_t const & entry) - : parent(&parent), entry(entry) - { - } - - async_buffer_t * parent = nullptr; - wait_for_consume_iter_t entry = {}; - }; - - /** Constructor */ - explicit async_buffer_t(boost::asio::io_context & io_context, - std::size_t maximum_size = default_memory_pool_size) - : strand(io_context), mem_pool(maximum_size) - { - } - - /** Destructor */ - ~async_buffer_t() - { - // If there's an outstanding one-shot mode handler, then invoke it - // as cancelled so we don't make any client async process 'stuck' - boost::asio::post(strand, [osm = std::move(one_shot_mode)]() mutable { - if (osm) { - osm->handler(boost::asio::error::operation_aborted); - } - }); - } - - /** Enable one-shot mode, the completion handler will be invoked when - * the total committed bytes is equal to or greater than the pool size. - */ - template - auto async_buffer_full_oneshot(CompletionToken && token) - { - return boost::asio::async_initiate( - [this](auto && handler) mutable { - using Handler = decltype(handler); - - boost::asio::dispatch(strand, [this, h = std::forward(handler)]() mutable { - one_shot_mode = one_shot_t {std::move(h)}; - }); - }, - token); - } - - /** Request some data to send */ - template - auto async_consume(CompletionToken && token) - { - return boost::asio::async_initiate( - [this](auto && handler) mutable { - using Handler = decltype(handler); - do_async_consume(std::forward(handler)); - }, - token); - } - - /** Request some fixed space in the buffer */ - template - auto async_request_space(std::size_t n, CompletionToken && token) - { - return boost::asio::async_initiate( - [this](auto && handler, std::size_t n) mutable { - using Handler = decltype(handler); - this->do_async_request_space(std::forward(handler), n); - }, - token, - n); - } - - private: - friend class commit_action_t; - friend class consume_action_t; - - using pending_send_action_t = completion_handler_ref_t; - - /** @return a const version of the mutable buffer */ - static const_buffer_type as_const_buffer(mutable_buffer_type const & b) - { - return const_buffer_type(b.data(), b.size()); - } - - boost::asio::io_context::strand strand; - lib::alloc::memory_pool_t mem_pool; - std::optional one_shot_mode; - pending_send_action_t pending_send_action {}; - - std::list reclaim_queue {}; - std::list waiting_for_space_queue {}; - std::list waiting_for_commit_queue {}; - std::list ready_to_send_queue {}; - std::list sending_queue {}; - - /** Perform the async_consume action */ - template - void do_async_consume(Handler && handler) - { - static_assert(!std::is_reference_v); - static_assert(!std::is_const_v); - - // ok, execute on strand to serialize access to the buffer and queue - boost::asio::dispatch(strand, [this, handler = std::forward(handler)]() mutable { - // not allowed to have multiple senders... - if (pending_send_action) { - return handler(false, const_buffer_type {}, consume_action_t {}); - } - - // save the handler - pending_send_action = std::forward(handler); - - // check for stuff in the queue - check_for_sendable_items(); - }); - } - - /** Perform the async_request_space action */ - template - void do_async_request_space(Handler && handler, std::size_t n) - { - static_assert(!std::is_reference_v); - static_assert(!std::is_const_v); - - // fail if n == 0 - if (n == 0) { - return handler(false, mutable_buffer_type {}, commit_action_t {}); - } - - // ok, execute on strand to serialize access to the buffer and queue - boost::asio::dispatch(strand, [this, handler = std::forward(handler), n]() mutable { - // if the request is greater than the total pool size it'll never be fulfilled - if (n > mem_pool.size()) { - return handler(false, mutable_buffer_type {}, commit_action_t {}); - } - - auto data = mem_pool.alloc(n); - - if (data) { - auto mutable_buffer = mutable_buffer_type {data->data(), data->size()}; - auto & reclaim_entry = reclaim_queue.emplace_back(std::move(data), false); - auto iter = waiting_for_commit_queue.insert( - waiting_for_commit_queue.end(), - wait_for_commit_entry_t {n, reclaim_entry, as_const_buffer(mutable_buffer)}); - - return handler(true, std::move(mutable_buffer), commit_action_t(*this, iter)); - } - else { - waiting_for_space_queue.emplace_back(n, std::forward(handler)); - return; - } - }); - } - - /** Handle commit of some entry in the queue */ - void commit_entry(wait_for_commit_iter_t const & iter, - std::size_t commit_size, - sent_completion_handler_type && handler_ref) - { - // run on the strand, to serialize access - boost::asio::dispatch(strand, [this, iter, commit_size, handler_ref = std::move(handler_ref)]() mutable { - // convert the entry to ready - ready_to_send_queue.emplace_back(commit_size, - iter->reclaim_entry, - iter->buffers, - std::move(handler_ref)); - - // remove from wait queue - waiting_for_commit_queue.erase(iter); - - // and notify the send wait object if there is one - check_for_sendable_items(); - - // check if one-shot mode is enabled and if we have hit the buffer size - check_one_shot_mode(commit_size); - }); - } - - /** Handle discard of some entry in the queue */ - void discard_entry(wait_for_commit_iter_t const & iter) - { - // run on the strand, to serialize access - boost::asio::dispatch(strand, [this, iter]() { - // mark the reclaim entry as ready - iter->reclaim_entry.ready = true; - // remove from wait queue - waiting_for_commit_queue.erase(iter); - // check the reclaim queue - check_for_reclaim_items(); - }); - } - - /** Handle consume of some sent entry in the queue */ - void consume_entry(wait_for_consume_iter_t const & iter, bool success) - { - // run on the strand, to serialize access - boost::asio::dispatch(strand, [this, iter, success]() { - // enqueue to notify the handler - if (iter->handler) { - boost::asio::post(strand, - [handler = std::move(iter->handler), success]() mutable { handler(success); }); - } - - // mark the reclaim entry as ready - iter->reclaim_entry.ready = true; - // remove from wait queue - sending_queue.erase(iter); - // check the reclaim queue - check_for_reclaim_items(); - }); - } - - /** Check the ready-to-send queue for any items */ - void check_for_sendable_items() - { - // NB: must be called from the strand - - // nothing to do if the consumer is missing or the queue is empty - if ((!pending_send_action) || ready_to_send_queue.empty()) { - return; - } - - // remove the item from the RTS queue and insert in the SEND queue - ready_to_send_entry_t queue_entry = std::move(ready_to_send_queue.front()); - ready_to_send_queue.pop_front(); - auto iter = sending_queue.insert( - sending_queue.end(), - sending_entry_t {queue_entry.n, queue_entry.reclaim_entry, std::move(queue_entry.handler)}); - - // just send the first item in the queue - boost::asio::post( - strand, - [this, buffer = const_buffer_type(queue_entry.buffers.data(), queue_entry.n), iter]() mutable { - if (pending_send_action) { - pending_send_action(true, buffer, consume_action_t {*this, iter}); - } - }); - } - - /** Check the reclaim list for ready items and reclaim the space back into the buffer for reuse */ - void check_for_reclaim_items() - { - // NB: must be called from the strand - bool reclaimed_space = false; - - // first reclaim the space - for (auto iter = reclaim_queue.begin(); iter != reclaim_queue.end();) { - // the reclaimed memory must be returned to the buffer in order (since we do not track the spaces individually) - if (!iter->ready) { - break; - } - - // remove from list. the allocation will be freed in the destructor - iter = reclaim_queue.erase(iter); - - // so we know to check the wait for space list - reclaimed_space = true; - } - - if (!reclaimed_space) { - return; - } - - // arbitrary limit to 2g - std::size_t smallest_failed_alloc_attempt = std::numeric_limits::max(); - - // now check for anything waiting for space - for (auto iter = waiting_for_space_queue.begin(); iter != waiting_for_space_queue.end();) { - // if we've already made an allocation attempt that was smaller than this and it - // failed then there's no point trying this one - if (iter->n >= smallest_failed_alloc_attempt) { - iter++; - continue; - } - - // try to allocate a contiguous region - auto data = mem_pool.alloc(iter->n); - - if (data) { - // we got some memory so queue up the wait_for_commit entry and invoke the handler - auto mutable_buffer = mutable_buffer_type {data->data(), data->size()}; - auto handler = std::move(iter->handler); - auto & reclaim_entry = reclaim_queue.emplace_back(reclaim_entry_t {std::move(data), false}); - auto commit_iter = waiting_for_commit_queue.insert( - waiting_for_commit_queue.end(), - wait_for_commit_entry_t {iter->n, reclaim_entry, as_const_buffer(mutable_buffer)}); - - // now remove the old item - iter = waiting_for_space_queue.erase(iter); - - // invoke the handler asynchronously - boost::asio::post(strand, - [handler = std::move(handler), - mutable_buffer, - action = commit_action_t(*this, commit_iter)]() mutable { - handler(true, mutable_buffer, std::move(action)); - }); - } - else { - // not enough space for this allocation. record this as a failed attempt - // and carry on - smallest_failed_alloc_attempt = iter->n; - ++iter; - } - } - } - - void check_one_shot_mode(std::size_t commit_bytes) - { - // NB: must be called from the strand - - if (one_shot_mode) { - one_shot_mode->running_total += commit_bytes; - if (one_shot_mode->running_total >= mem_pool.size()) { - boost::asio::post(strand, [osm = std::move(one_shot_mode)]() mutable { osm->handler({}); }); - - // Lovely C++ quirk here, the optional has to be manually reset because moving from it doesn't clear - // the value, it does a move-from _on_ the value. In our case that just moves the handler but - // doesn't reset the optional, which leads to a segfault in the lambda... - one_shot_mode.reset(); - } - } - } - }; -} diff --git a/daemon/async/async_byte_reader.hpp b/daemon/async/async_byte_reader.hpp new file mode 100644 index 00000000..9ce0243b --- /dev/null +++ b/daemon/async/async_byte_reader.hpp @@ -0,0 +1,159 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "async/continuations/async_initiate.h" +#include "async/continuations/continuation.h" +#include "async/continuations/continuation_of.h" +#include "async/continuations/operations.h" +#include "async/continuations/use_continuation.h" + +#include +#include +#include + +#include +#include +#include + +namespace async { + + /** + * Helper class for reading chunks of byte data from some stream descriptor, repeatedly until eof + */ + class async_byte_reader_t : public std::enable_shared_from_this { + public: + static constexpr std::size_t default_read_chunk_size = 65536; + + // assumes that data returns a single item + static_assert(std::is_same_v); + + /** Constructor */ + explicit async_byte_reader_t(boost::asio::posix::stream_descriptor && sd, + std::size_t read_chunk_size = default_read_chunk_size) + : stream_descriptor(std::move(sd)), read_chunk_size(read_chunk_size) + { + } + + /** + * Read one chunk from the stream. Completion handler takes (boost::system::error_code, std::string_view). + * Async completes once per chunk of bytes so should be called in a loop. + */ + template + auto async_read_some(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = shared_from_this()]() { + // consume the bytes from the buffer, ready for the next loop + st->buffer.consume(std::exchange(st->n_to_consume, 0)); + + return st->stream_descriptor.async_read_some(st->buffer.prepare(st->read_chunk_size), + use_continuation) // + | then([st](boost::system::error_code const & ec, std::size_t n) { + auto const is_eof = (ec == boost::asio::error::eof); + + // handle errors + if ((!is_eof) && ec) { + LOG_DEBUG("Read failed with %s", ec.message().c_str()); + return std::pair {ec, std::string_view()}; + } + + // commit output->input + st->buffer.commit(n); + + // extract the string view from the buffer + auto const input_area = st->buffer.data(); + auto const message = std::string_view(reinterpret_cast(input_area.data()), + input_area.size()); + + st->n_to_consume = message.size(); + + // only report the EOF once the buffer is empty + if (is_eof && message.empty()) { + return std::pair {boost::system::error_code {boost::asio::error::eof}, + std::string_view {}}; + } + + // report the line + return std::pair {boost::system::error_code {}, message}; + }) // + | unpack_tuple(); + }, + std::forward(token)); + } + + private: + boost::asio::posix::stream_descriptor stream_descriptor; + boost::asio::streambuf buffer {}; + std::size_t read_chunk_size; + std::size_t n_to_consume {0}; + }; + + template + auto async_consume_all_bytes(std::shared_ptr pipe_reader, + Handler && handler, + CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [pipe_reader = std::move(pipe_reader), h = std::forward(handler)]() mutable { + return start_with(boost::system::error_code {}) // + | loop([](boost::system::error_code ec) { return start_with(!ec, ec); }, // + [pipe_reader = std::move(pipe_reader), + h = std::move(h)](boost::system::error_code const & /*ec*/) mutable { + return pipe_reader->async_read_some(use_continuation) // + | then([&h](boost::system::error_code ec, std::string_view message) + -> polymorphic_continuation_t { + // exit loop early on error + if (ec) { + return start_with(ec); + } + + // pass message to handler and consume result + return start_with(message) // + | then(h) // + | then([](auto... args) { + using args_type = + continuation_of_t...>; + + if constexpr (std::is_same_v, args_type>) { + return boost::system::error_code {}; + } + else { + static_assert( + std::is_same_v< + continuation_of_t, + args_type>, + "Pipe consume must return void, error-code or a " + "continuation thereof"); + + return boost::system::error_code {args...}; + } + }); + }); + }) + // filter EOF + | then([](boost::system::error_code ec) { + if (ec != boost::asio::error::eof) { + return ec; + } + return boost::system::error_code {}; + }) // + | map_error(); + }, + std::forward(token)); + } + + template + auto async_consume_all_bytes(boost::asio::posix::stream_descriptor && sd, + Handler && handler, + CompletionToken && token) + { + return async_consume_all_bytes(std::make_shared(std::move(sd)), + std::forward(handler), + std::forward(token)); + } +} diff --git a/daemon/async/async_line_reader.hpp b/daemon/async/async_line_reader.hpp new file mode 100644 index 00000000..83109133 --- /dev/null +++ b/daemon/async/async_line_reader.hpp @@ -0,0 +1,190 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "async/continuations/async_initiate.h" +#include "async/continuations/continuation.h" +#include "async/continuations/continuation_of.h" +#include "async/continuations/operations.h" +#include "async/continuations/use_continuation.h" + +#include +#include +#include + +#include +#include +#include +#include + +namespace async { + /** + * Helper class for reading lines, one by one from some stream descriptor + */ + class async_line_reader_t : public std::enable_shared_from_this { + public: + /** Constructor */ + explicit async_line_reader_t(boost::asio::posix::stream_descriptor && sd) : stream_descriptor(std::move(sd)) {} + + /** + * Read one line from the stream. Completion handler takes (boost::system::error_code, std::string_view). + * Async completes once per line of text so should be called in a loop. + * Line of text is delimited by '\n'. + */ + template + auto async_read_line(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [st = shared_from_this()]() { + // consume the bytes from the buffer, ready for the next loop + st->buffer.consume(std::exchange(st->n_to_consume, 0)); + + return boost::asio::async_read_until(st->stream_descriptor, + st->buffer, + '\n', + use_continuation) // + | then([st](boost::system::error_code const & ec, std::size_t n) { + // assumes that data returns a single item + static_assert(std::is_same_v); + + auto const is_eof = (ec == boost::asio::error::eof); + + // handle errors + if ((!is_eof) && ec) { + LOG_DEBUG("Read failed with %s", ec.message().c_str()); + return std::pair {ec, std::string_view()}; + } + + // process line of text + + // find the modified buffer chunk + auto const input_area = st->buffer.data(); + auto const read_area_length = std::min(n, input_area.size()); + + // first find the substr containing up-to the first '\n' marker + auto const read_area = + std::string_view(reinterpret_cast(input_area.data()), + read_area_length); + + auto const message = find_end_of_line(read_area); + st->n_to_consume = message.size(); + + // only report EOF once buffer is drained of complete lines + if (is_eof && message.empty()) { + // if there is some trailing, unterminated (by EOL) text, send it + if (input_area.size() > 0) { + st->n_to_consume = input_area.size(); + return std::pair { + boost::system::error_code {}, + std::string_view(reinterpret_cast(input_area.data()), + input_area.size())}; + } + + // otherwise report the EOF + return std::pair {boost::system::error_code {boost::asio::error::eof}, + std::string_view {}}; + } + + // report the line + return std::pair {boost::system::error_code {}, message}; + }) // + | unpack_tuple(); + }, + std::forward(token)); + } + + private: + static constexpr std::string_view find_end_of_line(std::string_view chars) + { + auto const n = chars.find_first_of('\n'); + + return (n != std::string_view::npos ? chars.substr(0, n + 1) // + : std::string_view {}); + } + + boost::asio::posix::stream_descriptor stream_descriptor; + boost::asio::streambuf buffer {}; + std::size_t n_to_consume {0}; + }; + + /** + * Consume all lines, one by one, from the stream, for each one pass it to the handler + * + * @param line_reader The line reader to read from + * @param handler The handler function of the form `(std::string_view)`, where return may be void or boost::system::error_code, or a continuation thereof + */ + template + auto async_consume_all_lines(std::shared_ptr line_reader, + Handler && handler, + CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [line_reader = std::move(line_reader), h = std::forward(handler)]() mutable { + return start_with(boost::system::error_code {}) // + | loop([](boost::system::error_code ec) { return start_with(!ec, ec); }, // + [line_reader = std::move(line_reader), + h = std::move(h)](boost::system::error_code const & /*ec*/) mutable { + return line_reader->async_read_line(use_continuation) // + | then([&h](boost::system::error_code ec, std::string_view message) + -> polymorphic_continuation_t { + // exit loop early on error + if (ec) { + return start_with(ec); + } + + // pass message to handler and consume result + return start_with(message) // + | then(h) // + | then([](auto... args) { + using args_type = + continuation_of_t...>; + + if constexpr (std::is_same_v, args_type>) { + return boost::system::error_code {}; + } + else { + static_assert( + std::is_same_v< + continuation_of_t, + args_type>, + "line consume must return void, error-code or a " + "continuation thereof"); + + return boost::system::error_code {args...}; + } + }); + }); + }) + // filter EOF + | then([](boost::system::error_code ec) { + if (ec != boost::asio::error::eof) { + return ec; + } + return boost::system::error_code {}; + }) // + | map_error(); + }, + std::forward(token)); + } + + /** + * Consume all lines, one by one, from the stream, for each one pass it to the handler + * + * @param sd The stream to read from + * @param handler The handler function of the form `(std::string_view)`, where return may be void or boost::system::error_code, or a continuation thereof + */ + template + auto async_consume_all_lines(boost::asio::posix::stream_descriptor && sd, + Handler && handler, + CompletionToken && token) + { + return async_consume_all_lines(std::make_shared(std::move(sd)), + std::forward(handler), + std::forward(token)); + } +} diff --git a/daemon/async/continuations/async_initiate.h b/daemon/async/continuations/async_initiate.h index 2748d531..8149374e 100644 --- a/daemon/async/continuations/async_initiate.h +++ b/daemon/async/continuations/async_initiate.h @@ -5,13 +5,16 @@ #include "async/continuations/continuation.h" #include "async/continuations/detail/continuation_factory.h" #include "async/continuations/operations.h" +#include "async/continuations/stored_continuation.h" #include "async/continuations/use_continuation.h" #include "lib/exception.h" +#include "lib/source_location.h" #include #include #include +#include namespace async::continuations { namespace detail { @@ -48,6 +51,32 @@ namespace async::continuations { } }; + /** Converts a boost::asio::async_initiate handler to a continuations receiver */ + template + struct async_initiate_receiver_to_handler_adaptor_t { + using handler_type = std::decay_t; + + handler_type handler; + + template + void operator()(Exceptionally && exceptionally, Args... args) + { + try { + handler(std::move(args)...); + } + catch (...) { + LOG_DEBUG("async_initiate_explicit caught exception from receiver"); + exceptionally(std::current_exception()); + } + } + }; + + /** Exception handler*/ + struct async_initiate_exceptionally_t { + void operator()(boost::system::error_code e) const { error_swallower_t::consume("async_init_explicit", e); } + void operator()(std::exception_ptr e) const { error_swallower_t::consume("async_init_explicit", e); } + }; + /** Helper for async_initiate_explicit; specialized for different token types */ template struct async_initiate_explicit_t; @@ -68,26 +97,6 @@ namespace async::continuations { using state_type = state_t; using next_type = std::decay_t; - /** Handler type passed to the boost initiator, initiates the next step in the chain */ - template - struct receiver_adaptor_t { - using exceptionally_type = std::decay_t; - - next_type next; - exceptionally_type exceptionally; - - void operator()(SigArgs... args) - { - try { - next(exceptionally, std::move(args)...); - } - catch (...) { - LOG_DEBUG("async_initiate_explicit caught exception from receiver"); - exceptionally(std::current_exception()); - } - } - }; - state_type state; next_type next; @@ -103,8 +112,12 @@ namespace async::continuations { try { std::apply( [this, &exceptionally](auto &&... args) mutable { - state.initiator(receiver_adaptor_t {std::move(next), exceptionally}, - exceptionally, + using stored_continuation_type = + raw_stored_continuation_t, + std::decay_t...>; + + state.initiator(stored_continuation_type(std::move(next), exceptionally), std::forward(args)...); }, std::move(state.init_args)); @@ -117,17 +130,22 @@ namespace async::continuations { }; boost_initiator_type initiator; + lib::source_loc_t sloc; init_args_tuple init_args; + + [[nodiscard]] constexpr name_and_loc_t trace() const { return {"async_initiate_explicit", sloc}; } }; template static auto initiate(use_continuation_t const & /*token*/, Initiator && initiator, + lib::source_loc_t sloc, InitArgs &&... args) { return detail::continuation_factory_t...>::make_continuation( state_t { std::forward(initiator), + sloc, std::make_tuple...>(std::forward(args)...)}); } }; @@ -136,17 +154,24 @@ namespace async::continuations { template struct async_initiate_explicit_t { template - static auto initiate(CompletionToken && token, Initiator && initiator, InitArgs &&... args) + static auto initiate(CompletionToken && token, + Initiator && initiator, + [[maybe_unused]] lib::source_loc_t sloc, + InitArgs &&... args) { return boost::asio::async_initiate( [initiator = std::forward(initiator)](auto && handler, InitArgs &&... args) mutable { - initiator( - std::forward(handler), - [](auto exception_ptr) { - LOG_DEBUG("Unexpected exception: %s", - lib::get_exception_ptr_str(std::move(exception_ptr))); - }, - std::forward(args)...); + using handler_type = decltype(handler); + using handler_wrapper_type = + async_initiate_receiver_to_handler_adaptor_t, + std::decay_t...>; + using stored_continuation_type = raw_stored_continuation_t...>; + + initiator(stored_continuation_type(handler_wrapper_type {std::forward(handler)}, + async_initiate_exceptionally_t {}), + std::forward(args)...); }, token, std::forward(args)...); @@ -209,14 +234,33 @@ namespace async::continuations { * @tparam Signature The receiver's signature (`void(Args...)`) as per boost::asio::async_initiate * @param initiator A callable that initiates the operation, taking `receiver, exceptionally, args...` as input * @param token The completion token + * @param sloc Source location + */ + template + auto async_initiate_explicit(Initiator && initiator, CompletionToken && token, SLOC_DEFAULT_ARGUMENT) + { + return detail::async_initiate_explicit_t, Signature>::initiate( + std::forward(token), + std::forward(initiator), + sloc); + } + + /** + * @param initiator A callable that initiates the operation, taking `receiver, exceptionally, args...` as input + * @param token The completion token + * @param sloc Source location * @param args Any args to pass to the initiator */ template - auto async_initiate_explicit(Initiator && initiator, CompletionToken && token, InitArgs &&... args) + auto async_initiate_explicit(Initiator && initiator, + CompletionToken && token, + ::lib::source_loc_t sloc, + InitArgs &&... args) { return detail::async_initiate_explicit_t, Signature>::initiate( std::forward(token), std::forward(initiator), + sloc, std::forward(args)...); } } diff --git a/daemon/async/continuations/detail/loop_state.h b/daemon/async/continuations/detail/loop_state.h index c13314bc..517db111 100644 --- a/daemon/async/continuations/detail/loop_state.h +++ b/daemon/async/continuations/detail/loop_state.h @@ -46,13 +46,15 @@ namespace async::continuations::detail { template void operator()(Exceptionally const & exceptionally, InputArgs &&... args) { + auto const & sloc = iteration_state->state.sloc; + predicate_type & predicate = iteration_state->state.predicate; - TRACE_CONTINUATION(iteration_state->state.sloc, + TRACE_CONTINUATION(sloc, "loop... calling predicate (iteration=%zu)", ++(iteration_state->loop_count)); - predicate_then_helper_type::initiate(iteration_state->state.sloc, + predicate_then_helper_type::initiate(sloc, predicate, predicate_result_initiator_t {std::move(iteration_state)}, exceptionally, @@ -67,23 +69,23 @@ namespace async::continuations::detail { template void operator()(Exceptionally const & exceptionally, bool condition, InputArgs &&... args) { + auto const & sloc = iteration_state->state.sloc; + if (condition) { generator_type & generator = iteration_state->state.generator; - TRACE_CONTINUATION(iteration_state->state.sloc, + TRACE_CONTINUATION(sloc, "loop... calling generator (iteration=%zu)", iteration_state->loop_count); - generator_then_helper_type::initiate(iteration_state->state.sloc, + generator_then_helper_type::initiate(sloc, generator, generator_result_initiator_t {std::move(iteration_state)}, exceptionally, std::move(args)...); } else { - TRACE_CONTINUATION(iteration_state->state.sloc, - "loop... complete (iteration=%zu)", - iteration_state->loop_count); + TRACE_CONTINUATION(sloc, "loop... complete (iteration=%zu)", iteration_state->loop_count); iteration_state->next(exceptionally, std::move(args)...); } @@ -102,13 +104,13 @@ namespace async::continuations::detail { template void operator()(Exceptionally const & exceptionally, InputArgs &&... args) { - predicate_type & predicate = iteration_state->state.predicate; + auto const & sloc = iteration_state->state.sloc; + + TRACE_CONTINUATION(sloc, "loop... calling predicate (iteration=%zu)", iteration_state->loop_count); - TRACE_CONTINUATION(iteration_state->state.sloc, - "loop... calling predicate (iteration=%zu)", - iteration_state->loop_count); + predicate_type & predicate = iteration_state->state.predicate; - predicate_then_helper_type::initiate(iteration_state->state.sloc, + predicate_then_helper_type::initiate(sloc, predicate, predicate_result_initiator_t {std::move(iteration_state)}, exceptionally, diff --git a/daemon/async/continuations/detail/polymorphic_state.h b/daemon/async/continuations/detail/polymorphic_state.h index b7dc5455..b9d5291f 100644 --- a/daemon/async/continuations/detail/polymorphic_state.h +++ b/daemon/async/continuations/detail/polymorphic_state.h @@ -12,7 +12,7 @@ #include #include -#include +#include #include namespace async::continuations::detail { @@ -76,6 +76,13 @@ namespace async::continuations::detail { return {std::make_shared(exceptionally)}; } + static polymorphic_exceptionally_t wrap_exceptionally(polymorphic_exceptionally_t const & exceptionally) + { + return exceptionally; + } + + constexpr polymorphic_exceptionally_t() = default; + // NOLINTNEXTLINE(hicpp-explicit-conversions) polymorphic_exceptionally_t(std::shared_ptr && exceptionally) : exceptionally(exceptionally) @@ -136,14 +143,26 @@ namespace async::continuations::detail { return {std::make_unique(std::forward(next_initiator))}; } + static polymorphic_next_initiator_t wrap_next_initiator(polymorphic_next_initiator_t next_initiator) + { + return next_initiator; + } + + constexpr polymorphic_next_initiator_t() = default; + // NOLINTNEXTLINE(hicpp-explicit-conversions) polymorphic_next_initiator_t(std::unique_ptr> && next_initiator) : next_initiator(std::move(next_initiator)) { } + [[nodiscard]] explicit operator bool() const { return !!next_initiator; } + void operator()(polymorphic_exceptionally_t const & exceptionally, InputArgs &&... args) { + std::unique_ptr> next_initiator { + std::move(this->next_initiator)}; + (*next_initiator)(exceptionally, std::move(args)...); } diff --git a/daemon/async/continuations/detail/trace.h b/daemon/async/continuations/detail/trace.h index de9a195d..97e357f8 100644 --- a/daemon/async/continuations/detail/trace.h +++ b/daemon/async/continuations/detail/trace.h @@ -2,6 +2,7 @@ #pragma once +#include "Config.h" #include "Logging.h" #include "lib/source_location.h" @@ -11,7 +12,9 @@ } while (false) // continuation tracing is expensive and only available on debug builds (and only outputs when --trace is set) -#ifndef NDEBUG +#if (!defined(NDEBUG) \ + && (!defined(CONFIG_DISABLE_CONTINUATION_TRACING) || (CONFIG_DISABLE_CONTINUATION_TRACING == 0))) \ + || (defined(GATOR_UNIT_TESTS) && (GATOR_UNIT_TESTS != 0)) #include "lib/Span.h" diff --git a/daemon/async/continuations/detail/unpack_tuple.h b/daemon/async/continuations/detail/unpack_tuple.h new file mode 100644 index 00000000..c9498cf3 --- /dev/null +++ b/daemon/async/continuations/detail/unpack_tuple.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "async/continuations/detail/continuation_factory.h" +#include "async/continuations/detail/unpack_tuple_state.h" +#include "lib/source_location.h" + +#include +#include +#include + +namespace async::continuations::detail { + + /** + * Factory for constructing a continuation_t for the 'unpack tuple' operation + */ + struct unpack_tuple_factory_t { + + /** Create the continuation */ + template + static constexpr auto make_continuation(continuation_t> && from, + lib::source_loc_t const & sloc) + { + using next_factory = continuation_factory_t; + using state_type = unpack_tuple_state_t>; + + return next_factory::make_continuation(std::move(from), state_type {sloc}); + } + + /** Create the continuation */ + template + static constexpr auto make_continuation(continuation_t> && from, + lib::source_loc_t const & sloc) + { + using next_factory = continuation_factory_t; + using state_type = unpack_tuple_state_t>; + + return next_factory::make_continuation(std::move(from), state_type {sloc}); + } + }; +} diff --git a/daemon/async/continuations/detail/unpack_tuple_state.h b/daemon/async/continuations/detail/unpack_tuple_state.h new file mode 100644 index 00000000..8ecd7314 --- /dev/null +++ b/daemon/async/continuations/detail/unpack_tuple_state.h @@ -0,0 +1,44 @@ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "async/continuations/continuation_traits.h" +#include "async/continuations/detail/trace.h" +#include "lib/source_location.h" + +#include +#include + +namespace async::continuations::detail { + /** The continuation chain state object for unpack_tuple */ + template + struct unpack_tuple_state_t { + /** Initiator object for unpack_variant */ + template + struct initiator_type { + using state_type = unpack_tuple_state_t; + using next_type = std::decay_t; + + next_type next; + + template + explicit constexpr initiator_type(state_type && /*state*/, Args &&... args) + : next(std::forward(args)...) + { + } + + template + void operator()(Exceptionally const & exceptionally, TupleType && tuple) + { + std::apply([this, &exceptionally]( + auto &&... values) { next(exceptionally, std::forward(values)...); }, + std::move(tuple)); + } + }; + + lib::source_loc_t sloc; + + [[nodiscard]] constexpr name_and_loc_t trace() const { return {"unpack_tuple", sloc}; } + }; + +} diff --git a/daemon/async/continuations/operations.h b/daemon/async/continuations/operations.h index 4453e435..9d923bee 100644 --- a/daemon/async/continuations/operations.h +++ b/daemon/async/continuations/operations.h @@ -15,6 +15,7 @@ #include "async/continuations/detail/predicate.h" #include "async/continuations/detail/start_state.h" #include "async/continuations/detail/then.h" +#include "async/continuations/detail/unpack_tuple.h" #include "async/continuations/detail/unpack_variant.h" #include "lib/Assert.h" #include "lib/exception.h" @@ -27,7 +28,7 @@ #include #include -#include +#include /** A helper that provides the 'detach' like operation, but with debug logging for terminal exceptions */ #define DETACH_LOG_ERROR(name) async::continuations::finally(async::continuations::error_swallower_t {(name)}); @@ -89,6 +90,10 @@ namespace async::continuations { using type = unpack_variant_detected_co_op_t; }; + struct unpack_tuple_co_op_t { + lib::source_loc_t sloc; + }; + template struct predicate_co_op_t { lib::source_loc_t sloc; @@ -329,7 +334,7 @@ namespace async::continuations { } /** - * Constructs a 'unpackt std::variant using std::visit' operation that can be chained to some continuation using | so that subsequent operations execute on the supplied executor. + * Constructs a 'unpacked std::variant using std::visit' operation that can be chained to some continuation using | so that subsequent operations execute on the supplied executor. * @tparam Args A single type being the expected common return type for the next link in the chain. For continuations with multiple arguments pass `continuation_of_t` * @param op Some callable operation that receives the values produced by the preceeding continuation and will produce some next continuation * @return The op wrapper object @@ -341,6 +346,12 @@ namespace async::continuations { return typename detail::unpack_variant_co_op_from_t::type {sloc, std::move(op)}; } + /** + * Constructs a 'unpacked tuple' operation that can be chained to some continuation using | so that subsequent operations execute on the supplied executor. + * @return The op wrapper object + */ + constexpr auto unpack_tuple(SLOC_DEFAULT_ARGUMENT) { return detail::unpack_tuple_co_op_t {sloc}; } + /** Bring the config option in */ using detail::on_executor_mode_t; @@ -520,6 +531,13 @@ namespace async::continuations { return factory_type::make_continuation(std::move(continuation), op.sloc, std::move(op.op)); } + template + constexpr auto operator|(continuation_t && continuation, + detail::unpack_tuple_co_op_t && op) + { + return detail::unpack_tuple_factory_t::make_continuation(std::move(continuation), op.sloc); + } + /** Chain a continuation that produces no value, with another continuation */ template constexpr auto operator|(continuation_t && prev_continuation, @@ -805,4 +823,65 @@ namespace async::continuations { { continuation([](Args... /*args*/) {}, exceptionally, sloc); } + + /** Spawn a continuation as a virtual thread, calling the handler with a boolean flag to indicate (true) that an error occured */ + template, bool> = false> + void spawn(char const * name, + continuation_t && continuation, + Handler && handler, + SLOC_DEFAULT_ARGUMENT) + { + continuation( + [sloc, h = handler](Args &&... /*args*/) mutable { + TRACE_CONTINUATION(sloc, "spawn completing without error"); + h(false); + }, + [h = handler, name](auto const & e) { h(error_swallower_t::consume(name, e)); }, + sloc); + } + + /** Spawn a continuation as a virtual thread, calling the handler with a boolean flag to indicate (true) that an error occured, with the boost::system::error code extracted as the second argument if the error can be converted to it */ + template, bool> = false> + void spawn(char const * name, + continuation_t && continuation, + Handler && handler, + SLOC_DEFAULT_ARGUMENT) + { + continuation( + [sloc, h = handler](Args &&... /*args*/) mutable { + TRACE_CONTINUATION(sloc, "spawn completing without error"); + h(false, boost::system::error_code {}); + }, + [h = handler, name](auto const & e) { + if (error_swallower_t::consume(name, e)) { + // store the failure + if constexpr (std::is_same_v>) { + h(true, e); + } + else { + h(true, boost::system::error_code {}); + } + } + else { + h(false, boost::system::error_code {}); + } + }, + sloc); + } + + /** Spawn a continuation as a virtual thread */ + template + void spawn(char const * name, continuation_t && continuation, SLOC_DEFAULT_ARGUMENT) + { + continuation( + [sloc](Args &&... /*args*/) mutable { TRACE_CONTINUATION(sloc, "spawn completing without error"); }, + [name](auto e) { error_swallower_t::consume(name, e); }, + sloc); + } } diff --git a/daemon/async/continuations/stored_continuation.h b/daemon/async/continuations/stored_continuation.h new file mode 100644 index 00000000..37fb3e0c --- /dev/null +++ b/daemon/async/continuations/stored_continuation.h @@ -0,0 +1,186 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "async/continuations/continuation_traits.h" +#include "async/continuations/detail/polymorphic_state.h" + +#include +#include +#include + +#include + +namespace async::continuations { + + /** + * A raw (typed) stored continuation, that may be resumed some time later, as per async_initiate_explicit. + * + * Intended for use where the polymorphic variant (stored_continuation_t) is not appropriate. + * + * @tparam Receiver The raw receiver type + * @tparam Exceptionally The exceptionally type + * @tparam Args The continuation arguments + */ + template + class raw_stored_continuation_t { + public: + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + + using receiver_type = Receiver; + using exceptionally_type = Exceptionally; + + constexpr raw_stored_continuation_t() = default; + + constexpr raw_stored_continuation_t(receiver_type receiver, exceptionally_type exceptionally) + : receiver(std::move(receiver)), exceptionally(std::move(exceptionally)) + { + } + + // move only as is required for the receiver + raw_stored_continuation_t(raw_stored_continuation_t const &) = delete; + raw_stored_continuation_t & operator=(raw_stored_continuation_t const &) = delete; + raw_stored_continuation_t(raw_stored_continuation_t &&) noexcept = default; + raw_stored_continuation_t & operator=(raw_stored_continuation_t &&) noexcept = default; + + // move the receiver (but copy exceptionally so that it remains valid for any subsequent call to get_exceptionally) + [[nodiscard]] raw_stored_continuation_t move() + { + return {std::move(receiver), exceptionally}; + } + + [[nodiscard]] exceptionally_type const & get_exceptionally() const { return exceptionally; } + + /** + * Resume a stored continuation by posting it + * + * @param ex_or_ctx The context or executor to run on + * @param sc The stored continuation to resume + * -param args The continuation arguments + */ + template + friend void resume_continuation(ExOrCtx && ex_or_ctx, raw_stored_continuation_t && sc, Args... args) + { + boost::asio::post(std::forward(ex_or_ctx), + [r = std::move(sc.receiver), + e = std::move(sc.exceptionally), + args = std::make_tuple(std::move(args)...)]() mutable { + try { + std::apply([&r, &e](Args &&... a) { r(e, std::move(a)...); }, std::move(args)); + } + catch (...) { + e(std::current_exception()); + } + }); + } + + /** + * Chain a continuation with a stored one such that the continuation's output is passed to the stored one + * + * @param ex_or_ctx The context or executor to run on + * @param continuation The continuation the prepend to the stored continuation + * @param sc The stored continuation to resume + */ + template + friend void submit(ExOrCtx && ex_or_ctx, + continuation_t && continuation, + raw_stored_continuation_t && sc) + { + boost::asio::post( + std::forward(ex_or_ctx), + [r = std::move(sc.receiver), e = std::move(sc.exceptionally), c = std::move(continuation)]() mutable { + c([r = std::move(r), e](Args... args) mutable { r(e, std::move(args)...); }, e); + }); + } + + /** + * Chain a continuation with a stored one such that the continuation's output is passed to the stored one + * + * @param continuation The continuation the prepend to the stored continuation + * @param sc The stored continuation to resume + */ + template + friend void submit(continuation_t && continuation, raw_stored_continuation_t && sc) + { + continuation( + [r = std::move(sc.receiver), e = sc.exceptionally](Args... args) mutable { r(e, std::move(args)...); }, + sc.exceptionally); + } + + /** to support std::swap */ + void swap(raw_stored_continuation_t & that) + { + receiver_type tmp_r {std::move(receiver)}; + exceptionally_type tmp_e {std::move(exceptionally)}; + + receiver = std::move(that.receiver); + exceptionally = std::move(that.exceptionally); + + that.receiver = std::move(tmp_r); + that.exceptionally = std::move(tmp_e); + } + + protected: + template + friend class stored_continuation_t; + + receiver_type receiver; + exceptionally_type exceptionally; + }; + + /** + * A stored continuation, that may be resumed some time later, as per async_initiate_explicit + * + * @tparam Args The continuation arguments + */ + template + class stored_continuation_t : public raw_stored_continuation_t, + detail::polymorphic_exceptionally_t, + Args...> { + public: + using receiver_type = detail::polymorphic_next_initiator_t; + using exceptionally_type = detail::polymorphic_exceptionally_t; + using parent_type = raw_stored_continuation_t; + + constexpr stored_continuation_t() = default; + + // NOLINTNEXTLINE(hicpp-explicit-conversions) - allowed + constexpr stored_continuation_t(raw_stored_continuation_t && raw) + : parent_type(std::move(raw.receiver), std::move(raw.exceptionally)) + { + } + + constexpr stored_continuation_t(receiver_type receiver, exceptionally_type const & exceptionally) + : parent_type(std::move(receiver), exceptionally) + { + } + + template + // NOLINTNEXTLINE(hicpp-explicit-conversions) - allowed + constexpr stored_continuation_t(raw_stored_continuation_t && raw) + : parent_type(receiver_type::wrap_next_initiator(std::move(raw.receiver)), + detail::polymorphic_exceptionally_t::wrap_exceptionally(std::move(raw.exceptionally))) + { + } + + template + constexpr stored_continuation_t(Receiver && receiver, Exceptionally const & exceptionally) + : parent_type(receiver_type::wrap_next_initiator(std::forward(receiver)), + exceptionally_type::wrap_exceptionally(exceptionally)) + { + } + + [[nodiscard]] explicit operator bool() const { return !!parent_type::receiver; } + }; +} + +// NOLINTNEXTLINE(cert-dcl58-cpp) +namespace std { + template + inline void swap(async::continuations::raw_stored_continuation_t & a, + async::continuations::raw_stored_continuation_t & b) + { + a.swap(b); + } +} diff --git a/daemon/async/netlink/uevents.h b/daemon/async/netlink/uevents.h index 6e7fc40d..16f42981 100644 --- a/daemon/async/netlink/uevents.h +++ b/daemon/async/netlink/uevents.h @@ -3,6 +3,9 @@ #pragma once #include "Logging.h" +#include "async/continuations/async_initiate.h" +#include "async/continuations/stored_continuation.h" +#include "async/continuations/use_continuation.h" #include "async/netlink/nl_protocol.h" #include "lib/String.h" @@ -70,16 +73,18 @@ namespace async::netlink { template auto async_receive_one(CompletionToken && token) { - return boost::asio::async_initiate( - [this](auto && handler) { - socket.async_receive( - boost::asio::buffer(buffer), - [this, h = std::forward(handler)](auto const & ec, auto n) mutable { - h(ec, std::string_view(buffer.data(), !ec ? n : 0)); - }); + using namespace async::continuations; + + return async_initiate_explicit( + [this](auto && sc) { + submit(socket.async_receive(boost::asio::buffer(buffer), + use_continuation) // + | then([this](auto const & ec, auto n) { + return start_with(ec, std::string_view(buffer.data(), !ec ? n : 0)); + }), + std::forward(sc)); }, - token); + std::forward(token)); } private: @@ -108,10 +113,13 @@ namespace async::netlink { }; /** Constructor, using the provided context */ - explicit nl_kobject_uevent_monitor_t(boost::asio::io_context & context) : socket(context) {} + explicit nl_kobject_uevent_monitor_t(boost::asio::io_context & context) : context(context), socket(context) {} /** Constructor, using the provided socket (for testing) */ - explicit nl_kobject_uevent_monitor_t(socket_type && socket) : socket(std::forward(socket)) {} + nl_kobject_uevent_monitor_t(boost::asio::io_context & context, socket_type && socket) + : context(context), socket(std::forward(socket)) + { + } /** @return True if the socket is open, false otherwise */ [[nodiscard]] bool is_open() const { return socket.is_open(); } @@ -126,9 +134,11 @@ namespace async::netlink { template auto async_receive_one(CompletionToken && token) { - return boost::asio::async_initiate( - [this](auto && handler) { this->do_receive_one(std::forward(handler)); }, - token); + using namespace async::continuations; + + return async_initiate_explicit( + [this](auto && sc) { this->do_receive_one(std::forward(sc)); }, + std::forward(token)); } private: @@ -136,21 +146,25 @@ namespace async::netlink { static constexpr std::string_view devpath_prefix {"DEVPATH="}; static constexpr std::string_view subsystem_prefix {"SUBSYSTEM="}; + boost::asio::io_context & context; socket_type socket; - template - void do_receive_one(Handler && handler) + template + void do_receive_one( + async::continuations::raw_stored_continuation_t && sc) { LOG_TRACE("Waiting for uevent data"); - socket.async_receive_one([this, h = std::forward(handler)](auto const & ec, - auto sv) mutable { + socket.async_receive_one([this, sc = std::forward(sc)](auto const & ec, auto sv) mutable { if (!ec) { - this->parse(std::move(h), sv); + this->parse(std::move(sc), sv); } else { - LOG_ERROR_IF_NOT_EOF(ec, "Unexpected NETLINK_KOBJECT_UEVENT socket error %s", ec.message().c_str()); - h(ec, event_t {}); + LOG_ERROR_IF_NOT_EOF_OR_CANCELLED(ec, + "Unexpected NETLINK_KOBJECT_UEVENT socket error %s", + ec.message().c_str()); + + resume_continuation(context, std::move(sc), ec, event_t {}); } }); } @@ -160,11 +174,12 @@ namespace async::netlink { * This method extracts the relevent strings from that sequence and * passes them to the handler as an `event_t` object. * - * @param handler The async initiator that receives the event + * @param sc The stored continuation that receives the event * @param sv The string view containing the raw event blob */ - template - void parse(Handler && handler, std::string_view sv) + template + void parse(async::continuations::raw_stored_continuation_t && sc, + std::string_view sv) { bool has_action = false; bool has_devpath = false; @@ -208,11 +223,11 @@ namespace async::netlink { event.action.data(), event.devpath.data(), event.subsystem.data()); - return handler(boost::system::error_code {}, event); + return resume_continuation(context, std::move(sc), boost::system::error_code {}, event); } // ignore the event if the string_view is not parsed correctly - return do_receive_one(std::forward(handler)); + return do_receive_one(std::move(sc)); } }; } diff --git a/daemon/async/proc/async_exec.cpp b/daemon/async/proc/async_exec.cpp new file mode 100644 index 00000000..9e823740 --- /dev/null +++ b/daemon/async/proc/async_exec.cpp @@ -0,0 +1,46 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#include "async/proc/async_exec.hpp" + +namespace async::proc { + + lib::AutoClosingFd from_stdout(async_process_t & p) + { + lib::AutoClosingFd result {std::move(p.get_stdout_read())}; + p.on_output_complete({}, false); + return result; + } + + lib::AutoClosingFd from_stderr(async_process_t & p) + { + lib::AutoClosingFd result {std::move(p.get_stderr_read())}; + p.on_output_complete({}, true); + return result; + } + + boost::system::error_code configure_stdin(std::shared_ptr const & /*process*/, + detail::discard_tag_t const & /*tag */, + lib::AutoClosingFd & fd) + { + fd.close(); + return {}; + } + + boost::system::error_code configure_stdin(std::shared_ptr const & /*process*/, + detail::ignore_tag_t const & /*tag */, + lib::AutoClosingFd & /*fd*/) + { + return {}; + } + + boost::system::error_code configure_stdout_err(std::shared_ptr const & process, + detail::ignore_tag_t const & /*tag */, + bool is_stderr, + lib::AutoClosingFd & fd) + { + if (!fd) { + process->on_output_complete({}, is_stderr); + } + return {}; + } +} diff --git a/daemon/async/proc/async_exec.hpp b/daemon/async/proc/async_exec.hpp new file mode 100644 index 00000000..9169f598 --- /dev/null +++ b/daemon/async/proc/async_exec.hpp @@ -0,0 +1,744 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "Logging.h" +#include "async/async_byte_reader.hpp" +#include "async/async_line_reader.hpp" +#include "async/continuations/continuation.h" +#include "async/continuations/use_continuation.h" +#include "async/proc/async_process.hpp" +#include "async/proc/process_monitor.hpp" + +#include + +#include +#include + +#include + +namespace async::proc { + + namespace detail { + struct discard_tag_t { + }; + struct pipe_tag_t { + }; + struct log_tag_t { + }; + struct ignore_tag_t { + }; + + struct from_file_t { + char const * filename; + }; + + struct to_file_t { + char const * filename; + bool truncate; + }; + + // can pass a file name as stdin + [[nodiscard]] inline lib::error_code_or_t create_stdin(from_file_t && ff) + { + return lib::pipe_pair_t::from_file(ff.filename); + } + + // can pass a read fd as stdin + [[nodiscard]] inline lib::error_code_or_t create_stdin(lib::AutoClosingFd && read_from) + { + return lib::pipe_pair_t {std::move(read_from), {}}; + } + + // can pass an existing pipe pair as stdin; is treated like pipe_tag_t + [[nodiscard]] inline lib::error_code_or_t create_stdin( + lib::error_code_or_t && pipes) + { + return {std::move(pipes)}; + } + + // can pass an existing pipe pair as stdin; is treated like pipe_tag_t + [[nodiscard]] inline lib::error_code_or_t create_stdin(lib::pipe_pair_t && pipes) + { + return {std::move(pipes)}; + } + + // can discard stdin + [[nodiscard]] inline lib::error_code_or_t create_stdin(discard_tag_t const & /*tag*/) + { + return lib::pipe_pair_t::create(0); + } + + // can create a pipe for stdin for manual processing + [[nodiscard]] inline lib::error_code_or_t create_stdin(pipe_tag_t const & /*tag*/) + { + return lib::pipe_pair_t::create(0); + } + + // no log_tag_t for stdin as does not make sense + + // can pass a file name as stdout/stderr + [[nodiscard]] inline lib::error_code_or_t create_stdout_err(to_file_t && to_file) + { + return lib::pipe_pair_t::to_file(to_file.filename, to_file.truncate); + } + + // can pass a read fd as stdout/stderr + [[nodiscard]] inline lib::error_code_or_t create_stdout_err(lib::AutoClosingFd && write_to) + { + return lib::pipe_pair_t {{}, std::move(write_to)}; + } + + // can pass an existing pipe pair as stdout/stderr; is treated like pipe_tag_t + [[nodiscard]] inline lib::error_code_or_t create_stdout_err(lib::pipe_pair_t pipes) + { + return {std::move(pipes)}; + } + + // discard just writes to /dev/null + [[nodiscard]] inline lib::error_code_or_t create_stdout_err(discard_tag_t const & /*tag*/) + { + return lib::pipe_pair_t::to_file("/dev/null"); + } + + // can create a pipe for stdout/stderr for manual processing + [[nodiscard]] inline lib::error_code_or_t create_stdout_err(pipe_tag_t const & /*tag*/) + { + return lib::pipe_pair_t::create(0); + } + + // can log the output from stdout/stderr to the log + [[nodiscard]] inline lib::error_code_or_t create_stdout_err(log_tag_t const & /*tag*/) + { + return lib::pipe_pair_t::create(0); + } + + template + [[nodiscard]] boost::system::error_code configure_stdout_err(std::shared_ptr const & process, + Consumer && consumer, + bool is_stderr, + lib::AutoClosingFd & fd) + { + using namespace async::continuations; + + // log it + if (fd) { + spawn("async_process_t log loop", + async_consume_all_lines({process->context(), fd.release()}, + std::forward(consumer), + use_continuation), + [is_stderr, process](bool failed, boost::system::error_code ec) { + if (failed) { + // store the failure + if (ec) { + process->on_output_complete(ec, is_stderr); + } + else { + process->on_output_complete( + boost::system::errc::make_error_code(boost::system::errc::io_error), + is_stderr); + } + } + else { + process->on_output_complete({}, is_stderr); + } + }); + } + // otherwise flag as complete + else { + process->on_output_complete({}, is_stderr); + } + + return {}; + } + + } + + /** A simple type tag, that wraps a consumer object and indicates that the stdout or stderr + * should be a line consumer, where each line of output is passed to the handler as it is read */ + template + struct line_consumer_t { + static_assert(std::is_invocable_v); + T handler; + }; + + /** A line consumer, that writes to the log */ + struct log_line_consumer_t { + void operator()(pid_t pid, std::string_view line, bool is_stderr) + { + if (!line.empty()) { + if ((line.back() == '\n') || (line.back() == '\0')) { + line.remove_suffix(1); + } + if (is_stderr) { + LOG_STDERR(pid, line); + } + else { + LOG_STDOUT(pid, line); + } + } + } + }; + + /** A simple type tag, that wraps a consumer object and indicates that the stdout or stderr + * should be a pipe consumer, where chunks of bytes of output is passed to the handler as it is read */ + template + struct pipe_consumer_t { + static_assert(std::is_invocable_v); + T handler; + }; + + /** A pipe consumer, that writes to the log */ + struct log_pipe_consumer_t { + void operator()(pid_t pid, std::string_view blob, bool is_stderr) + { + if (!blob.empty()) { + if (is_stderr) { + LOG_STDERR(pid, blob); + } + else { + LOG_STDOUT(pid, blob); + } + } + } + }; + + /** Convert a line consumer that receives lines into a line_consumer_t.handler compatible consumer */ + template + auto wrap_line_consumer(pid_t pid, line_consumer_t && consumer, bool is_stderr) + { + return [pid, c = std::move(consumer.handler), is_stderr](std::string_view line) mutable { + c(pid, line, is_stderr); + }; + } + + /** Convert a pipe consumer that receives lines into a line_consumer_t.handler compatible consumer */ + template + auto wrap_pipe_consumer(pid_t pid, pipe_consumer_t && consumer, bool is_stderr) + { + return [pid, c = std::move(consumer.handler), is_stderr](std::string_view blob) mutable { + c(pid, blob, is_stderr); + }; + } + + /** Used to indicate that stdin is not used by process and should be closed, or that for stdout/stderr, it should be routed to /dev/null */ + static constexpr detail::discard_tag_t discard_ioe; + /** Used to indicate that stdin/stdout/stderr should be a pipe, which will be used externally */ + static constexpr detail::pipe_tag_t pipe_ioe; + /** Used to indicate that stdout/stderr should be a pipe, which will be logged to */ + static constexpr detail::log_tag_t log_oe; + /** Used to indicate that stdin should read from a file */ + [[nodiscard]] constexpr detail::from_file_t read_from(char const * filename) { return {filename}; } + /** Used to indicate that stdout/stderr should write from a file (overwriting it) */ + [[nodiscard]] constexpr detail::to_file_t write_to(char const * filename) { return {filename, true}; } + /** Used to indicate that stdout/stderr should write from a file (appending it) */ + [[nodiscard]] constexpr detail::to_file_t append_to(char const * filename) { return {filename, false}; } + /** Use to extract a pipe for stdin of a new process, using stdout of the prev process */ + [[nodiscard]] lib::AutoClosingFd from_stdout(async_process_t & p); + /** Use to extract a pipe for stdin of a new process, using stderr of the prev process */ + [[nodiscard]] lib::AutoClosingFd from_stderr(async_process_t & p); + + /** + * Map a stdin 'mode' value to a handler type tag. + * In this case, the discard tag is returned, indicating that stdin should be closed as it will not be used. + */ + [[nodiscard]] constexpr detail::discard_tag_t stdin_mode_type(detail::discard_tag_t const & /*tag*/) { return {}; } + + /** + * Map a stdin 'mode' value to a handler type tag. + * In this case, the ignore tag is returned, indicating that stdin should not be modified as it is already closed, or will + * be used as-is. + */ + template + [[nodiscard]] constexpr detail::ignore_tag_t stdin_mode_type(T const & /*tag*/) + { + return {}; + } + + /** + * Map a stdout/stderr 'mode' value to a handler type tag. + * In this case, the log tag is mapped to a pipe consumer object, which consumes bytes from the pipe into the log + */ + [[nodiscard]] constexpr pipe_consumer_t stdout_err_mode_type(detail::log_tag_t const & /*tag*/) + { + return {}; + } + + /** + * Map a stdout/stderr 'mode' value to a handler type tag. + * In this case, the tag is mapped to an ignore tag meaning that stdout/stderr should not be modified as already closed, or used as-is. + */ + template + [[nodiscard]] constexpr detail::ignore_tag_t stdout_err_mode_type(T const & /*tag*/) + { + return {}; + } + + /** + * Configure stdin for some process. + * In this case, because the type is the discard tag, stdin will be closed. + */ + [[nodiscard]] boost::system::error_code configure_stdin(std::shared_ptr const & /*process*/, + detail::discard_tag_t const & /*tag */, + lib::AutoClosingFd & fd); + + /** + * Configure stdin for some process. + * In this case, because the type is the ignore tag, nothing will happen. + */ + [[nodiscard]] boost::system::error_code configure_stdin(std::shared_ptr const & /*process*/, + detail::ignore_tag_t const & /*tag */, + lib::AutoClosingFd & /*fd*/); + + /** + * Configure stdout/stderr for some process. + * In this case, because the type is the ignore tag, nothing will happen, but the complete state will be update if the fd was already closed. + */ + [[nodiscard]] boost::system::error_code configure_stdout_err(std::shared_ptr const & process, + detail::ignore_tag_t const & /*tag */, + bool is_stderr, + lib::AutoClosingFd & fd); + + /** + * Configure stdout/stderr for some process. + * In this case, because the type is the line consumer tag, the stream will be configured to asynchronously read all lines and pass each one to the consumer. + */ + template + [[nodiscard]] boost::system::error_code configure_stdout_err(std::shared_ptr const & process, + line_consumer_t && consumer, + bool is_stderr, + lib::AutoClosingFd & fd) + { + return detail::configure_stdout_err(process, + wrap_line_consumer(process->get_pid(), std::move(consumer), is_stderr), + is_stderr, + fd); + } + + /** + * Configure stdout/stderr for some process. + * In this case, because the type is the pipe consumer tag, the stream will be configured to asynchronously read all lines and pass each one to the consumer. + */ + template + [[nodiscard]] boost::system::error_code configure_stdout_err(std::shared_ptr const & process, + pipe_consumer_t && consumer, + bool is_stderr, + lib::AutoClosingFd & fd) + { + return detail::configure_stdout_err(process, + wrap_pipe_consumer(process->get_pid(), std::move(consumer), is_stderr), + is_stderr, + fd); + } + + /** + * The arguments required to exec a process + */ + struct async_exec_args_t { + /** The process exe to run */ + std::string command; + /** The vector of args to pass to exec */ + std::vector args; + /** The working directory, empty means current */ + boost::filesystem::path working_dir; + /** Optional uid/gid pair to change to */ + std::optional> uid_gid; + /** When true, means args[0] is not the name of the command, and command will be inserted as args[0] to exec */ + bool prepend_command; + + /** Constructor, for a command with no arguments */ + explicit async_exec_args_t(std::string command) : command(std::move(command)), prepend_command(true) {} + + /** Constructor, for a command and its arguments. + * If args is empty, it will be set to the command, otherwise the first argument + * must be the command name repeated as per argv[0], as per exec. */ + async_exec_args_t(std::string command, std::vector args, bool prepend_command = true) + : command(std::move(command)), args(std::move(args)), prepend_command(prepend_command || this->args.empty()) + { + } + + /** Constructor, for when working directory is also required. + * If args is empty, it will be set to the command, otherwise the first argument + * must be the command name repeated as per argv[0], as per exec. */ + async_exec_args_t(std::string command, + std::vector args, + boost::filesystem::path working_dir, + bool prepend_command = true) + : command(std::move(command)), + args(std::move(args)), + working_dir(std::move(working_dir)), + prepend_command(prepend_command || this->args.empty()) + { + } + + /** Constructor, for all components. + * If args is empty, it will be set to the command, otherwise the first argument + * must be the command name repeated as per argv[0], as per exec. */ + async_exec_args_t(std::string command, + std::vector args, + boost::filesystem::path working_dir, + std::optional> uid_gid, + bool prepend_command = true) + : command(std::move(command)), + args(std::move(args)), + working_dir(std::move(working_dir)), + uid_gid(uid_gid), + prepend_command(prepend_command || this->args.empty()) + { + } + + /** Constructor, for all components. + * If args is empty, it will be set to the command, otherwise the first argument + * must be the command name repeated as per argv[0], as per exec. */ + async_exec_args_t(std::string command, + std::vector args, + boost::filesystem::path working_dir, + uid_t uid, + gid_t gid, + bool prepend_command = true) + : command(std::move(command)), + args(std::move(args)), + working_dir(std::move(working_dir)), + uid_gid({uid, gid}), + prepend_command(prepend_command || this->args.empty()) + { + } + }; + + /** + * Create an async_process_t. The process will be created in an unconfigured state. + * The completion handler must configure, then start, then exec the process. + */ + template + static auto async_create_process(process_monitor_t & process_monitor, + async_exec_args_t exec_args, + lib::stdio_fds_t stdio_fds, + CompletionToken && token) + { + using namespace async::continuations; + + LOG_DEBUG("Creating process %s", exec_args.command.c_str()); + + return async_initiate( + [&process_monitor, exec_args = std::move(exec_args), stdio_fds = std::move(stdio_fds)]() mutable { + return process_monitor.async_fork_exec(exec_args.prepend_command, + std::move(exec_args.command), + std::move(exec_args.args), + std::move(exec_args.working_dir), + exec_args.uid_gid, + std::move(stdio_fds), + use_continuation); + }, + std::forward(token)); + } + + /** + * Create an async_process_t. The process will be created in a configured and started state. + * The completion handler must, then exec the process. + * + * @param process_monitor The process monitor + * @param context The context for which io and other processing should happen on + * @param exec_args The set of configuration options defining the process to run + * @param stdin_mode Indicates how stdin should be handled + * @param stdout_mode Indicates how stdout should be handled + * @param stderr_mode Indicates how stderr should be handled + * @param token The async completion token + */ + template + static auto async_create_process(process_monitor_t & process_monitor, + boost::asio::io_context & context, + async_exec_args_t exec_args, + InputMode && stdin_mode, + OutputMode && stdout_mode, + ErrorMode && stderr_mode, + CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [&process_monitor, + &context, + exec_args = std::move(exec_args), + stdin_mode = std::forward(stdin_mode), + stdout_mode = std::forward(stdout_mode), + stderr_mode = std::forward(stderr_mode)]() mutable + -> polymorphic_continuation_t> { + // map to input/output mode to some handler type or tag + auto stdin_type = stdin_mode_type(stdin_mode); + auto stdout_type = stdout_err_mode_type(stdout_mode); + auto stderr_type = stdout_err_mode_type(stderr_mode); + + // create the fds + auto stdio_fds = lib::stdio_fds_t::create_from(detail::create_stdin(std::move(stdin_mode)), + detail::create_stdout_err(std::move(stdout_mode)), + detail::create_stdout_err(std::move(stderr_mode))); + + auto const * error = lib::get_error(stdio_fds); + if (error != nullptr) { + LOG_DEBUG("Failed to create some io"); + return start_with(*error, std::shared_ptr()); + } + + // fork the process + return async_create_process(process_monitor, + std::move(exec_args), + lib::get_value(std::move(stdio_fds)), + use_continuation) // + | then([&process_monitor, + &context, + stdin_type = std::move(stdin_type), + stdout_type = std::move(stdout_type), + stderr_type = + std::move(stderr_type)](boost::system::error_code ec, + process_monitor_t::fork_result_t fork_result) mutable { + LOG_DEBUG("Forked process %s, %d", ec.message().c_str(), fork_result.process.get_pid()); + + // forward error + if (ec) { + return std::pair {ec, std::shared_ptr {}}; + } + + auto result = std::make_shared( + async_process_t {process_monitor, context, std::move(fork_result)}); + + // configure stdin + ec = configure_stdin(result, std::move(stdin_type), result->get_stdin_write()); + if (ec) { + return std::pair {ec, std::shared_ptr {}}; + } + + // configure stdout + ec = configure_stdout_err(result, std::move(stdout_type), false, result->get_stdout_read()); + if (ec) { + return std::pair {ec, std::shared_ptr {}}; + } + + // configure stderr + ec = configure_stdout_err(result, std::move(stderr_type), true, result->get_stderr_read()); + if (ec) { + return std::pair {ec, std::shared_ptr {}}; + } + + // start observing events + result->start(); + + return std::pair {ec, result}; + }) // + | unpack_tuple(); + }, + std::forward(token)); + } + + /** Helper that takes async_process_t and waits for it to complete. */ + template + auto async_wait_for_completion(std::shared_ptr const & process, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate>( + [process]() mutable -> polymorphic_continuation_t { + // read off events until it terminates + return repeatedly([process]() { return !process->is_terminated(); }, + [process]() { + LOG_DEBUG("Waiting for event %d", process->get_pid()); + + return process->async_wait_complete(use_continuation) // + | then([process](auto ec, auto by_signal, auto status) { + if (ec) { + LOG_DEBUG("unexpected error reported for process %d (%s)", + process->get_pid(), + ec.message().c_str()); + } + else { + LOG_DEBUG("process %d terminated due to %s with status=%d", + process->get_pid(), + (by_signal ? "signal" : "exit"), + status); + } + }); + }) + // reading one last time just gets the final exit state + | process->async_wait_complete(use_continuation); + }, + std::forward(token)); + } + + /** Helper that takes async_process_t and runs it to completion. */ + template + auto async_run_to_completion(std::shared_ptr const & process, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate>( + [process]() mutable -> polymorphic_continuation_t { + // exec the process + if (!process->exec()) { + LOG_DEBUG("Exec failed for %d", process->get_pid()); + + return start_with(boost::system::errc::make_error_code(boost::system::errc::no_such_process), + false, + 0); + } + // wait for it to finish + return async_wait_for_completion(process, use_continuation); + }, + std::forward(token)); + } + + /** Helper that takes a continuation returned by `async_process_t::async_create_process(..., use_continuation)` and runs it to completion. */ + template + auto async_run_to_completion( + async::continuations::continuation_t> && + continuation, + CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate>( + [c = std::move(continuation)]() mutable { + return std::move(c) // + | map_error() // + | then([](std::shared_ptr const & ap) + -> polymorphic_continuation_t { + LOG_DEBUG("Successfully started process %d", ap->get_pid()); + return async_run_to_completion(ap, use_continuation); + }); + }, + std::forward(token)); + } + + /** + * Run a process to completion and asynchronously wait for that completion + * + * @param process_monitor The process monitor + * @param context The context for which io and other processing should happen on + * @param exec_args The exec configuration + * @param stdin_mode Indicates how stdin should be handled + * @param stdout_mode Indicates how stdout should be handled + * @param stderr_mode Indicates how stderr should be handled + * @param token The async completion token + */ + template + inline auto async_exec(process_monitor_t & process_monitor, + boost::asio::io_context & context, + async_exec_args_t exec_args, + InputMode && stdin_mode, + OutputMode && stdout_mode, + ErrorMode && stderr_mode, + CompletionToken && token) + { + using namespace async::continuations; + + return async_run_to_completion(async_create_process(process_monitor, + context, + std::move(exec_args), + std::forward(stdin_mode), + std::forward(stdout_mode), + std::forward(stderr_mode), + use_continuation), + std::forward(token)); + } + + /** + * Run a process to completion and asynchronously wait for that completion. + * + * Any output is written to the log. + * + * @param process_monitor The process monitor + * @param context The context for which io and other processing should happen on + * @param exec_args The exec configuration + * @param token The async completion token + */ + template + inline auto async_exec(process_monitor_t & process_monitor, + boost::asio::io_context & context, + async_exec_args_t exec_args, + CompletionToken && token) + { + using namespace async::continuations; + + return async_exec(process_monitor, + context, + std::move(exec_args), + discard_ioe, + log_oe, + log_oe, + std::forward(token)); + } + + /** + * Run a process to completion and asynchronously wait for that completion. + * + * Allows configuring the stdout/stderr (e.g. for redirection to a file) + * + * @param process_monitor The process monitor + * @param context The context for which io and other processing should happen on + * @param exec_args The exec configuration + * @param stdout_mode Indicates how stdout should be handled + * @param stderr_mode Indicates how stderr should be handled + * @param token The async completion token + */ + template + inline auto async_exec(process_monitor_t & process_monitor, + boost::asio::io_context & context, + async_exec_args_t exec_args, + OutputMode && stdout_mode, + ErrorMode && stderr_mode, + CompletionToken && token) + { + using namespace async::continuations; + + return async_run_to_completion(async_create_process(process_monitor, + context, + std::move(exec_args), + discard_ioe, + std::forward(stdout_mode), + std::forward(stderr_mode), + use_continuation), + std::forward(token)); + } + + /** + * Run a process to completion and asynchronously wait for that completion. + * + * Allows configuring the stdout/stderr (e.g. for redirection to a file) + * + * @param process_monitor The process monitor + * @param context The context for which io and other processing should happen on + * @param from_process The process to pipe from + * @param use_stderr True to pipe from stderr, false to pipe from stdout (the previous process must have been correctly setup with pipe on that fd) + * @param exec_args The exec configuration + * @param stdout_mode Indicates how stdout should be handled + * @param stderr_mode Indicates how stderr should be handled + * @param token The async completion token + */ + template + inline auto async_exec_piped(process_monitor_t & process_monitor, + boost::asio::io_context & context, + std::shared_ptr const & from_process, + bool use_stderr, + async_exec_args_t exec_args, + OutputMode && stdout_mode, + ErrorMode && stderr_mode, + CompletionToken && token) + { + using namespace async::continuations; + + return async_run_to_completion( + start_by([from_process]() { + return (from_process->exec() + ? boost::system::error_code {} + : boost::system::errc::make_error_code(boost::system::errc::no_such_process)); + }) // + | map_error() // + | async_create_process( + process_monitor, + context, + std::move(exec_args), + (use_stderr ? async::proc::from_stderr(*from_process) : async::proc::from_stdout(*from_process)), + std::forward(stdout_mode), + std::forward(stderr_mode), + use_continuation), + std::forward(token)); + } +} diff --git a/daemon/async/proc/async_proc_poller.cpp b/daemon/async/proc/async_proc_poller.cpp new file mode 100644 index 00000000..756df634 --- /dev/null +++ b/daemon/async/proc/async_proc_poller.cpp @@ -0,0 +1,121 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#include "async/proc/async_proc_poller.h" + +namespace async::detail { + namespace { + + /** + * Get the exe path for a process by reading /proc/[PID]/cmdline + */ + std::optional get_process_cmdline_exe_path(const lib::FsEntry & entry) + { + const lib::FsEntry cmdline_file = lib::FsEntry::create(entry, "cmdline"); + const std::string cmdline_contents = lib::readFileContents(cmdline_file); + // need to extract just the first part of cmdline_contents (as it is an packed sequence of c-strings) + // so use .c_str() to extract the first string (which is the exe path) and create a new string from it + const std::string cmdline_exe = cmdline_contents.c_str(); // NOLINT(readability-redundant-string-cstr) + if ((!cmdline_exe.empty()) && (cmdline_exe.at(0) != '\n') && (cmdline_exe.at(0) != '\r')) { + return lib::FsEntry::create(cmdline_exe); + } + return std::optional {}; + } + + } + + /** + * Checks the name of the FsEntry to see if it is a number, and checks the type + * to see if it is a directory. + * + * @return True if criteria are met, false otherwise + */ + bool is_pid_directory(const lib::FsEntry & entry) + { + // type must be directory + const lib::FsEntry::Stats stats = entry.read_stats(); + if (stats.type() != lib::FsEntry::Type::DIR) { + return false; + } + + // name must be only digits + const std::string name = entry.name(); + for (char chr : name) { + if (std::isdigit(chr) == 0) { + return false; + } + } + + return true; + } + + /** @return The process exe path (or some estimation of it). Empty if the thread is a kernel thread, otherwise + * contains 'something' + */ + std::optional get_process_exe_path(const lib::FsEntry & entry) + { + auto proc_pid_exe = lib::FsEntry::create(entry, "exe"); + + { + auto exe_realpath = proc_pid_exe.realpath(); + + if (exe_realpath) { + // check android paths + auto name = exe_realpath->name(); + if ((name == "app_process") || (name == "app_process32") || (name == "app_process64")) { + // use the command line instead + auto cmdline_exe = get_process_cmdline_exe_path(entry); + if (cmdline_exe) { + return cmdline_exe; + } + } + + // use realpath(/proc/pid/exe) + return exe_realpath; + } + } + + // exe was linked to nothing, try getting from cmdline (but it must be for a real file) + auto cmdline_exe = get_process_cmdline_exe_path(entry); + if (!cmdline_exe) { + // no cmdline, must be a kernel thread + return {}; + } + + // resolve the cmdline string to a real path + if (cmdline_exe->path().front() == '/') { + // already an absolute path, so just resolve it to its realpath + auto cmdline_exe_realpath = cmdline_exe->realpath(); + if (cmdline_exe_realpath) { + return cmdline_exe_realpath; + } + } + else { + // try relative to process cwd first + auto cwd_file = lib::FsEntry::create(entry, "cwd"); + auto rel_exe_file = lib::FsEntry::create(cwd_file, cmdline_exe->path()); + auto abs_exe_file = rel_exe_file.realpath(); + + if (abs_exe_file) { + // great, use that + return abs_exe_file; + } + } + + // we could not resolve exe or the command to a real path. + // Since the exe_path value *must* contain something for any non-kernel PID, + // then prefer to send 'comm' (so long as it is not an empty string) + auto comm_file = lib::FsEntry::create(entry, "comm"); + auto comm_file_contents = lib::readFileContents(comm_file); + if (!comm_file_contents.empty()) { + return lib::FsEntry::create(comm_file_contents); + } + + // comm was empty, so fall back to whatever the commandline was + if (cmdline_exe) { + return cmdline_exe; + } + + // worst case just send /proc//exe + return proc_pid_exe; + } +} diff --git a/daemon/async/proc/async_proc_poller.h b/daemon/async/proc/async_proc_poller.h index b295d51f..e6af535c 100644 --- a/daemon/async/proc/async_proc_poller.h +++ b/daemon/async/proc/async_proc_poller.h @@ -2,13 +2,17 @@ #pragma once +#include "async/asio_traits.h" #include "async/continuations/async_initiate.h" +#include "async/continuations/continuation.h" #include "async/continuations/operations.h" #include "async/continuations/use_continuation.h" #include "lib/FsEntry.h" #include "lib/Utils.h" #include "linux/proc/ProcessPollerBase.h" +#include + #include #include #include @@ -23,110 +27,71 @@ namespace async { * * @return True if criteria are met, false otherwise */ - bool is_pid_directory(const lib::FsEntry & entry) - { - // type must be directory - const lib::FsEntry::Stats stats = entry.read_stats(); - if (stats.type() != lib::FsEntry::Type::DIR) { - return false; - } - - // name must be only digits - const std::string name = entry.name(); - for (char chr : name) { - if (std::isdigit(chr) == 0) { - return false; - } - } - - return true; - } - - /** - * Get the exe path for a process by reading /proc/[PID]/cmdline - */ - std::optional get_process_cmdline_exe_path(const lib::FsEntry & entry) - { - const lib::FsEntry cmdline_file = lib::FsEntry::create(entry, "cmdline"); - const std::string cmdline_contents = lib::readFileContents(cmdline_file); - // need to extract just the first part of cmdline_contents (as it is an packed sequence of c-strings) - // so use .c_str() to extract the first string (which is the exe path) and create a new string from it - const std::string cmdline_exe = cmdline_contents.c_str(); // NOLINT(readability-redundant-string-cstr) - if ((!cmdline_exe.empty()) && (cmdline_exe.at(0) != '\n') && (cmdline_exe.at(0) != '\r')) { - return lib::FsEntry::create(cmdline_exe); - } - return std::optional {}; - } + bool is_pid_directory(const lib::FsEntry & entry); /** @return The process exe path (or some estimation of it). Empty if the thread is a kernel thread, otherwise * contains 'something' */ - std::optional get_process_exe_path(const lib::FsEntry & entry) - { - auto proc_pid_exe = lib::FsEntry::create(entry, "exe"); - + std::optional get_process_exe_path(const lib::FsEntry & entry); + + /** Helper for iterating some directory asynchronously */ + template + class async_dir_iterator_t : public std::enable_shared_from_this> { + public: + async_dir_iterator_t(Executor const & executor, lib::FsEntry const & dir, Op && op) + : executor(executor), dir(dir), iterator(dir.children()), op(std::forward(op)) { - auto exe_realpath = proc_pid_exe.realpath(); - - if (exe_realpath) { - // check android paths - auto name = exe_realpath->name(); - if ((name == "app_process") || (name == "app_process32") || (name == "app_process64")) { - // use the command line instead - auto cmdline_exe = get_process_cmdline_exe_path(entry); - if (cmdline_exe) { - return cmdline_exe; - } - } - - // use realpath(/proc/pid/exe) - return exe_realpath; - } - } - - // exe was linked to nothing, try getting from cmdline (but it must be for a real file) - auto cmdline_exe = get_process_cmdline_exe_path(entry); - if (!cmdline_exe) { - // no cmdline, must be a kernel thread - return {}; - } - - // resolve the cmdline string to a real path - if (cmdline_exe->path().front() == '/') { - // already an absolute path, so just resolve it to its realpath - auto cmdline_exe_realpath = cmdline_exe->realpath(); - if (cmdline_exe_realpath) { - return cmdline_exe_realpath; - } - } - else { - // try relative to process cwd first - auto cwd_file = lib::FsEntry::create(entry, "cwd"); - auto rel_exe_file = lib::FsEntry::create(cwd_file, cmdline_exe->path()); - auto abs_exe_file = rel_exe_file.realpath(); - - if (abs_exe_file) { - // great, use that - return abs_exe_file; - } } - // we could not resolve exe or the command to a real path. - // Since the exe_path value *must* contain something for any non-kernel PID, - // then prefer to send 'comm' (so long as it is not an empty string) - auto comm_file = lib::FsEntry::create(entry, "comm"); - auto comm_file_contents = lib::readFileContents(comm_file); - if (!comm_file_contents.empty()) { - return lib::FsEntry::create(comm_file_contents); + [[nodiscard]] async::continuations::polymorphic_continuation_t async_run() + { + using namespace async::continuations; + + LOG_TRACE("SCAN DIR: %s", dir.path().c_str()); + + auto self = this->shared_from_this(); + + return start_with(iterator.next(), boost::system::error_code {}) // + | loop( + // iterate while no error and has another value + [self](std::optional entry, boost::system::error_code const & ec) { + auto const valid = entry.has_value() && !ec; + LOG_TRACE("LOOP DIR: '%s' = '%s' == %d", + self->dir.path().c_str(), + (entry ? entry->path().c_str() : ""), + valid); + return start_with(valid, std::move(entry), ec); + }, + [self](std::optional entry, boost::system::error_code const & /*ec*/) { + LOG_TRACE("EXEC DIR: '%s' = '%s'", self->dir.path().c_str(), entry->path().c_str()); + return start_on(self->executor) // + | self->op(std::move(*entry)) // + | post_on(self->executor) // + | then([self](boost::system::error_code const & ec) { + LOG_TRACE("... ec=%s", ec.message().c_str()); + return start_with(self->iterator.next(), ec); + }); + }) // + | then( + [self](std::optional const & /*entry*/, boost::system::error_code const & ec) { + LOG_TRACE("FINISHED DIR: '%s' = %s", self->dir.path().c_str(), ec.message().c_str()); + return ec; + }); } - // comm was empty, so fall back to whatever the commandline was - if (cmdline_exe) { - return cmdline_exe; - } + private: + Executor executor; + lib::FsEntry dir; + lib::FsEntryDirectoryIterator iterator; + Op op; + }; - // worst case just send /proc//exe - return proc_pid_exe; + template + auto make_async_dir_iterator(Executor && executor, lib::FsEntry const & dir, Op && op) + { + return std::make_shared>(std::forward(executor), + dir, + std::forward(op)); } } @@ -139,16 +104,20 @@ namespace async { template class async_proc_poller_t : public std::enable_shared_from_this> { public: + using error_code_continuation_t = async::continuations::polymorphic_continuation_t; + /* Callback signatures. */ - using on_process_directory_type = std::function; - using on_thread_directory_type = std::function; - using on_thread_details_type = std::function &, - const std::optional &)>; - - explicit async_proc_poller_t(Executor & executor) : executor {executor}, procDir {lib::FsEntry::create("/proc")} + using on_process_directory_type = std::function; + using on_thread_directory_type = std::function; + using on_thread_details_type = + std::function &, + const std::optional &)>; + + explicit async_proc_poller_t(Executor const & executor) + : executor {executor}, procDir {lib::FsEntry::create("/proc")} { } @@ -180,7 +149,7 @@ namespace async { boost::mp11::mp_bind>::value; // Iterate over the callbacks and assign to the right var - boost::mp11::tuple_for_each(std::tuple {std::forward(callbacks)...}, [&](auto && callback) { + boost::mp11::tuple_for_each(std::tuple {std::forward(callbacks)...}, [&](auto callback) { using callback_type = std::decay_t; if constexpr (std::is_convertible_v) { @@ -208,26 +177,30 @@ namespace async { boost::mp11::tuple_for_each(callback_ptrs, [&](auto callback_ptr) { auto & callback = *callback_ptr; if (!callback) { - callback = [](auto &&...) {}; + callback = [](auto &&...) { + return error_code_continuation_t { + start_with(boost::system::error_code {}), + }; + }; } }); } return async_initiate>( - [self = this->shared_from_this(), callbacks = std::move(callbacks_wrapper)]() mutable { - return start_on(self->executor) | // - then([self, callbacks = std::move(callbacks)]() mutable { - auto iterator = lib::FsEntryDirectoryIterator(self->procDir.children()); - while (std::optional entry = iterator.next()) { - if (async::detail::is_pid_directory(*entry)) { - self->template process_pid_directory(*entry, - callbacks); - } - } - - // Currently we always succeed - return boost::system::error_code {}; - }); + [self = this->shared_from_this(), + callbacks = std::make_shared(std::move(callbacks_wrapper))]() mutable { + auto iterator = async::detail::make_async_dir_iterator( + self->executor, + self->procDir, + [self, callbacks](lib::FsEntry entry) { + return async_proc_poller_t::template process_pid_directory( + self, + std::move(entry), + callbacks); + }); + + return start_on(self->executor) // + | iterator->async_run(); }, token); } @@ -240,89 +213,155 @@ namespace async { }; template - static void process_pid_directory(const lib::FsEntry & entry, callbacks_t & callbacks) + static error_code_continuation_t process_pid_directory(std::shared_ptr self, + lib::FsEntry entry, + std::shared_ptr callbacks) { - const auto name = entry.name(); - auto exe_path = detail::get_process_exe_path(entry); + using namespace async::continuations; - // read the pid - const auto pid = std::strtol(name.c_str(), nullptr, 0); + // ignore non-pid directories + if (!async::detail::is_pid_directory(entry)) { + return start_with(boost::system::error_code {}); + } - // call the receiver object - callbacks.on_process_directory(pid, entry); + auto name = entry.name(); + auto exe_path = async::detail::get_process_exe_path(entry); + // read the pid + auto pid = std::strtol(name.c_str(), nullptr, 0); // process threads? if constexpr (WantThreads || WantStats) { - // the /proc/[PID]/task directory - const auto task_directory = lib::FsEntry::create(entry, "task"); - - // the /proc/[PID]/task/[PID]/ directory - const auto task_pid_directory = lib::FsEntry::create(task_directory, name); - const auto task_pid_directory_stats = task_pid_directory.read_stats(); - - // if for some reason taskPidDirectory does not exist, then use stat and statm in the procPid directory instead - if ((!task_pid_directory_stats.exists()) - || (task_pid_directory_stats.type() != lib::FsEntry::Type::DIR)) { - process_tid_directory(pid, entry, exe_path, callbacks); - } - - // scan all the TIDs in the task directory - auto task_iterator = task_directory.children(); - while (auto task_entry = task_iterator.next()) { - if (detail::is_pid_directory(*task_entry)) { - process_tid_directory(pid, *task_entry, exe_path, callbacks); - } - } + // call the receiver object + return callbacks->on_process_directory(pid, entry) + // then process the threads + | then([entry = std::move(entry), + name = std::move(name), + exe_path = std::move(exe_path), + pid, + self = std::move(self), + callbacks](boost::system::error_code const & ec) mutable -> error_code_continuation_t { + // forward error? + if (ec) { + return start_with(ec); + } + + // the /proc/[PID]/task directory + const auto task_directory = lib::FsEntry::create(entry, "task"); + + // the /proc/[PID]/task/[PID]/ directory + const auto task_pid_directory = lib::FsEntry::create(task_directory, name); + const auto task_pid_directory_stats = task_pid_directory.read_stats(); + + // if for some reason taskPidDirectory does not exist, then use stat and statm in the procPid directory instead + if ((!task_pid_directory_stats.exists()) + || (task_pid_directory_stats.type() != lib::FsEntry::Type::DIR)) { + return process_tid_directory(pid, + std::move(entry), + std::move(exe_path), + std::move(callbacks)); + } + + // scan all the TIDs in the task directory + auto task_iterator = async::detail::make_async_dir_iterator( + self->executor, + task_directory, + [exe_path, callbacks, pid](lib::FsEntry entry) { + return async_proc_poller_t::template process_tid_directory( + pid, + std::move(entry), + exe_path, + callbacks); + }); + + return task_iterator->async_run(); + }); + } + else { + // just call the receiver object + return callbacks->on_process_directory(pid, entry); } } template - static void process_tid_directory(int pid, - const lib::FsEntry & entry, - const std::optional & exe, - callbacks_t & callbacks) + static error_code_continuation_t process_tid_directory(int pid, + lib::FsEntry entry, + std::optional exe, + std::shared_ptr callbacks) { - const long tid = std::strtol(entry.name().c_str(), nullptr, 0); + using namespace async::continuations; - // call the receiver object - callbacks.on_thread_directory(pid, tid, entry); + const long tid = std::strtol(entry.name().c_str(), nullptr, 0); // process stats? if constexpr (WantStats) { - std::optional statm_file_record {lnx::ProcPidStatmFileRecord()}; - - // open /proc/[PID]/statm - { - const lib::FsEntry statm_file = lib::FsEntry::create(entry, "statm"); - const lib::FsEntry::Stats statm_file_stats = statm_file.read_stats(); - - if (statm_file_stats.exists() && statm_file_stats.type() == lib::FsEntry::Type::FILE) { - const std::string statm_file_contents = lib::readFileContents(statm_file); - - if (!lnx::ProcPidStatmFileRecord::parseStatmFile(*statm_file_record, - statm_file_contents.c_str())) { - statm_file_record.reset(); - } - } - } + // call the receiver object + return callbacks->on_thread_directory(pid, tid, entry) + // then call the stats handler + | then([entry, callbacks, pid, tid, exe = std::move(exe)]( + boost::system::error_code const & ec) mutable -> error_code_continuation_t { + // forward error? + if (ec) { + return start_with(ec); + } + + std::optional statm_file_record {lnx::ProcPidStatmFileRecord()}; + + // open /proc/[PID]/statm + { + const lib::FsEntry statm_file = lib::FsEntry::create(entry, "statm"); + const lib::FsEntry::Stats statm_file_stats = statm_file.read_stats(); + + if (statm_file_stats.exists() && statm_file_stats.type() == lib::FsEntry::Type::FILE) { + const std::string statm_file_contents = lib::readFileContents(statm_file); + + if (!lnx::ProcPidStatmFileRecord::parseStatmFile(*statm_file_record, + statm_file_contents.c_str())) { + statm_file_record.reset(); + } + } + } + + // open /proc/[PID]/stat + { + const lib::FsEntry stat_file = lib::FsEntry::create(entry, "stat"); + const lib::FsEntry::Stats stat_file_stats = stat_file.read_stats(); + + if (stat_file_stats.exists() && stat_file_stats.type() == lib::FsEntry::Type::FILE) { + const std::string stat_file_contents = lib::readFileContents(stat_file); + lnx::ProcPidStatFileRecord stat_file_record; + if (lnx::ProcPidStatFileRecord::parseStatFile(stat_file_record, + stat_file_contents.c_str())) { + return callbacks->on_thread_details(pid, + tid, + stat_file_record, + statm_file_record, + exe); + } + } + } - // open /proc/[PID]/stat - { - const lib::FsEntry stat_file = lib::FsEntry::create(entry, "stat"); - const lib::FsEntry::Stats stat_file_stats = stat_file.read_stats(); - - if (stat_file_stats.exists() && stat_file_stats.type() == lib::FsEntry::Type::FILE) { - const std::string stat_file_contents = lib::readFileContents(stat_file); - lnx::ProcPidStatFileRecord stat_file_record; - if (lnx::ProcPidStatFileRecord::parseStatFile(stat_file_record, stat_file_contents.c_str())) { - callbacks.on_thread_details(pid, tid, stat_file_record, statm_file_record, exe); - } - } - } + return start_with(boost::system::error_code {}); + }); + } + else { + // call the receiver object + return callbacks->on_thread_directory(pid, tid, entry); } } - Executor & executor; + Executor executor; lib::FsEntry procDir; }; + + template, bool> = false> + auto make_async_proc_poller(Executor const & ex) + { + return std::make_shared>(ex); + } + + template, bool> = false> + auto make_async_proc_poller(ExecutionContext & context) + { + return make_async_proc_poller(context.get_executor()); + } } diff --git a/daemon/async/proc/async_process.cpp b/daemon/async/proc/async_process.cpp new file mode 100644 index 00000000..d013a41c --- /dev/null +++ b/daemon/async/proc/async_process.cpp @@ -0,0 +1,172 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#include "async/proc/async_process.hpp" + +#include "async/continuations/async_initiate.h" +#include "async/continuations/operations.h" +#include "async/continuations/use_continuation.h" + +#include +#include +#include + +#include + +namespace async::proc { + void async_process_t::on_output_complete(boost::system::error_code const & ec, bool is_stderr) + { + using namespace async::continuations; + + spawn("async process set complete", + start_on(strand) // + | then([ec, is_stderr, st = shared_from_this()]() { + // log the error + if (ec) { + st->notify(ec); + } + + // update the state + if (is_stderr) { + st->stderr_complete = true; + } + else { + st->stdout_complete = true; + } + + // flush any events + st->flush(); + })); + } + + void async_process_t::start() + { + using namespace async::continuations; + + auto st = shared_from_this(); + + // observe events + spawn("async_process_t event loop", + repeatedly( + [st]() { + return start_on(st->strand) // + | then([st]() { return !st->already_terminated; }); + }, // + [st]() { + return st->process_monitor.async_wait_event(st->uid, use_continuation) // + | post_on(st->strand) // + | then([st](boost::system::error_code const & ec, + async::proc::process_monitor_event_t const & event) { + st->process_event(ec, event); + }); + }), + [st](bool failed, boost::system::error_code ec) { + if (failed) { + // store the failure + spawn("failure notifier", + start_on(st->strand) // + | then([st, ec]() { + if (ec) { + st->notify(ec); + } + else { + st->notify(boost::system::errc::make_error_code( + boost::system::errc::state_not_recoverable)); + } + })); + } + }); + } + + void async_process_t::do_async_wait_complete(completion_handler_t sc) + { + // cancel any already pending handler - only one may be queued at a time + completion_handler_t existing {std::move(completion_handler)}; + if (existing) { + resume_continuation(strand.context(), + std::move(existing), + boost::system::errc::make_error_code(boost::system::errc::operation_canceled), + false, + 0); + } + + // has it already terminated? + if (already_terminated && stdout_complete && stderr_complete) { + resume_continuation(strand.context(), std::move(sc), {}, terminated_by_signal, exit_status); + + return; + } + + // ok, store it for later + completion_handler = std::move(sc); + } + + void async_process_t::process_event(boost::system::error_code const & ec, + async::proc::process_monitor_event_t const & event) + { + if (ec) { + notify(ec); + } + + switch (event.state) { + case async::proc::ptrace_process_state_t::terminated_exit: { + terminate(false, event.status); + return; + } + case async::proc::ptrace_process_state_t::terminated_signal: { + terminate(true, event.status); + return; + } + case async::proc::ptrace_process_state_t::attached: + case async::proc::ptrace_process_state_t::attaching: + case async::proc::ptrace_process_state_t::no_such_process: + default: { + LOG_TRACE("ignoring unexpected event state %s::%s", to_cstring(event.type), to_cstring(event.state)); + return; + } + } + } + + void async_process_t::notify(boost::system::error_code const & ec) + { + if (!already_terminated) { + pending_errors.emplace_back(ec); + flush(); + } + } + + void async_process_t::terminate(bool by_signal, int status) + { + if (!already_terminated) { + terminated_by_signal = by_signal; + exit_status = status; + already_terminated = true; + flush(); + } + } + + void async_process_t::flush() + { + auto const really_terminated = (already_terminated && stdout_complete && stderr_complete); + + if (pending_errors.empty() && !really_terminated) { + return; + } + + completion_handler_t existing {std::move(completion_handler)}; + if (!existing) { + return; + } + + if (!pending_errors.empty()) { + auto error = pending_errors.front(); + pending_errors.pop_front(); + + resume_continuation(strand.context(), std::move(existing), error, false, 0); + + return; + } + + resume_continuation(strand.context(), std::move(existing), {}, terminated_by_signal, exit_status); + } + +} diff --git a/daemon/async/proc/async_process.hpp b/daemon/async/proc/async_process.hpp new file mode 100644 index 00000000..f31f85eb --- /dev/null +++ b/daemon/async/proc/async_process.hpp @@ -0,0 +1,113 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "Logging.h" +#include "async/async_line_reader.hpp" +#include "async/continuations/async_initiate.h" +#include "async/continuations/continuation.h" +#include "async/continuations/operations.h" +#include "async/continuations/stored_continuation.h" +#include "async/continuations/use_continuation.h" +#include "async/proc/process_monitor.hpp" +#include "async/proc/process_state.hpp" +#include "lib/AutoClosingFd.h" +#include "lib/error_code_or.hpp" +#include "lib/forked_process.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +namespace async::proc { + /** + * Represents some process with asynchronously observable termination state + */ + class async_process_t : public std::enable_shared_from_this { + public: + async_process_t(process_monitor_t & process_monitor, + boost::asio::io_context & context, + process_monitor_t::fork_result_t fork_result) + : process_monitor(process_monitor), + strand(context), + uid(fork_result.uid), + process(std::move(fork_result.process)) + { + } + + /** Start observing events; must be called once after successful configuration */ + void start(); + + [[nodiscard]] bool is_terminated() const + { + return (!process) || (already_terminated && stdout_complete && stderr_complete); + } + + [[nodiscard]] boost::asio::io_context & context() { return strand.context(); } + [[nodiscard]] pid_t get_pid() const { return process.get_pid(); } + + // these may be unset if the input/output was redirected to a file / the log / discarded + [[nodiscard]] lib::AutoClosingFd & get_stdin_write() { return process.get_stdin_write(); } + [[nodiscard]] lib::AutoClosingFd & get_stdout_read() { return process.get_stdout_read(); } + [[nodiscard]] lib::AutoClosingFd & get_stderr_read() { return process.get_stderr_read(); } + + /** Abort the process */ + void abort() { process.abort(); } + + /** Exec the process *iff* it has not already exec'd or aborted */ + [[nodiscard]] bool exec() { return process.exec(); } + + /** Mark stdout/stderr as completely read */ + void on_output_complete(boost::system::error_code const & ec, bool is_stderr); + + /** + * Asynchronously wait for termination (or some error event). + * + * Can be rewaited multiple times after each error, once terminated (which requires both stdout and stderr to be marked as complete) only the final termination event is notified. + */ + template + auto async_wait_complete(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate_explicit( + [st = shared_from_this()](auto && stored_continuation) { + submit(start_on(st->strand) // + | then([st, sc = stored_continuation.move()]() mutable { + st->do_async_wait_complete(std::move(sc)); + }), + stored_continuation.get_exceptionally()); + }, + std::forward(token)); + } + + private: + using completion_handler_t = async::continuations::stored_continuation_t; + + process_monitor_t & process_monitor; + boost::asio::io_context::strand strand; + process_uid_t uid; + lib::forked_process_t process; + completion_handler_t completion_handler {}; + std::deque pending_errors {}; + bool already_terminated {false}; + bool terminated_by_signal {false}; + int exit_status {0}; + bool stdout_complete {false}; + bool stderr_complete {false}; + + void do_async_wait_complete(completion_handler_t sc); + void process_event(boost::system::error_code const & ec, async::proc::process_monitor_event_t const & event); + void notify(boost::system::error_code const & ec); + void terminate(bool by_signal, int status); + void flush(); + }; +} diff --git a/daemon/async/proc/async_read_proc_maps.h b/daemon/async/proc/async_read_proc_maps.h index e2f30f04..4ba4178f 100644 --- a/daemon/async/proc/async_read_proc_maps.h +++ b/daemon/async/proc/async_read_proc_maps.h @@ -3,12 +3,15 @@ #pragma once #include "async/continuations/async_initiate.h" +#include "async/continuations/continuation.h" #include "async/continuations/operations.h" #include "async/continuations/use_continuation.h" #include "async/proc/async_proc_poller.h" +#include + #include -#include +#include namespace async { @@ -20,37 +23,58 @@ namespace async { * @tparam CompletionToken CompletionToken type * @param executor Executor instance, typically the one used inside @a sender * @param sender Sends the data + * @param filter filter callable that decides whether or not to send a specific process's details * @param token Called upon completion with an error_code * @return Nothing or a continuation, depending on @a CompletionToken */ - template - auto async_read_proc_maps(Executor & executor, Sender & sender, CompletionToken && token) + template + auto async_read_proc_maps(Executor && executor, + std::shared_ptr sender, + Filter && filter, + CompletionToken && token) { using namespace async::continuations; - using boost_error_type = boost::system::error_code; return async_initiate>( - [&, ec_record = std::make_shared()]() mutable { - auto poller = std::make_shared>(executor); - - return poller->async_poll(use_continuation, [&sender, ec_record](int pid, const lib::FsEntry & entry) { - const lib::FsEntry mapsFile = lib::FsEntry::create(entry, "maps"); - if (!mapsFile.exists()) { - *ec_record = boost::asio::error::not_found; - return; - } - else if (!mapsFile.canAccess(true, false, false)) { - *ec_record = boost::asio::error::no_permission; - return; - } - return sender.async_send_maps_frame(pid, - pid, - lib::readFileContents(mapsFile), - [ec_record](boost::system::error_code new_ec) mutable { - *ec_record = (!!new_ec) ? new_ec : *ec_record; - }); - }) | then([ec_record](auto ec) { return ec ? ec : *ec_record; }); + [poller = make_async_proc_poller(std::forward(executor)), + sender = std::move(sender), + filter = std::forward(filter)]() mutable { + return poller->async_poll(use_continuation, + [sender, filter = std::move(filter)](int pid, const lib::FsEntry & entry) + -> polymorphic_continuation_t { + // check filter + if (!filter(pid)) { + return start_with(boost::system::error_code {}); + } + + // missing or inaccessible file is not an error + const lib::FsEntry mapsFile = lib::FsEntry::create(entry, "maps"); + + if (!mapsFile.exists()) { + return start_with(boost::system::error_code {}); + } + + if (!mapsFile.canAccess(true, false, false)) { + return start_with(boost::system::error_code {}); + } + + // send the contents + return sender->async_send_maps_frame(pid, + pid, + lib::readFileContents(mapsFile), + use_continuation); + }); }, token); } + + template + auto async_read_proc_maps(Executor & executor, std::shared_ptr sender, CompletionToken && token) + { + return async_read_proc_maps( + executor, + std::move(sender), + [](int) { return true; }, + std::forward(token)); + } } diff --git a/daemon/async/proc/async_read_proc_sys_dependencies.h b/daemon/async/proc/async_read_proc_sys_dependencies.h index 7953ce2c..cb0ffdbe 100644 --- a/daemon/async/proc/async_read_proc_sys_dependencies.h +++ b/daemon/async/proc/async_read_proc_sys_dependencies.h @@ -7,6 +7,8 @@ #include "async/continuations/use_continuation.h" #include "async/proc/async_proc_poller.h" +#include + namespace async { /** * Reads the /proc sys dependencies synchronously, and sends the results via @a sender @@ -18,66 +20,50 @@ namespace async { * @param executor Executor instance, typically the one used inside @a sender * @param sender Sends the data * @param token Called upon completion with an error_code + * @param filter filter callable that decides whether or not to send a specific process's details * @return Nothing or a continuation, depending on @a CompletionToken */ - template - auto async_read_proc_sys_dependencies(Executor & executor, Sender & sender, CompletionToken && token) + template + auto async_read_proc_sys_dependencies(Executor && executor, + std::shared_ptr sender, + Filter && filter, + CompletionToken && token) { using namespace async::continuations; using poly_return_type = polymorphic_continuation_t; - struct result_t { - int pid; - int tid; - std::string comm; - std::string exe; - }; - return async_initiate>( - [&]() mutable { - auto results = std::make_shared>(); - auto poller = std::make_shared>(executor); - - return poller->async_poll( - use_continuation, - [results](int pid, - int tid, - const lnx::ProcPidStatFileRecord & statRecord, - const std::optional & /*statmRecord*/, - const std::optional & exe) { - results->push_back({pid, tid, statRecord.getComm(), exe ? exe->path() : ""}); - }) - | then([&sender, results](auto ec) mutable -> poly_return_type { - return start_with() | // - do_if_else( - [ec]() { return !!ec; }, - [ec]() { return start_with(ec); }, // Exit early if async_poll returned an error - [&sender, results]() mutable { - return start_with(results->begin(), - results->end(), - boost::system::error_code {}) - | loop([](auto it, - auto end, - auto ec) { return start_with(it != end, it, end, ec); }, - [&sender, results](auto it, auto end, auto ec) mutable { - return sender.async_send_comms_frame(it->pid, - it->tid, - it->exe, - it->comm, - use_continuation) - | then([=](auto new_ec) mutable { - // Don't stop on the first failure - if (new_ec) { - ec = new_ec; - } - - return start_with(++it, end, ec); - }); - }) // - | then([](auto /*it*/, auto /*end*/, auto ec) { return ec; }); - }); - }); + [poller = make_async_proc_poller(std::forward(executor)), + sender = std::move(sender), + filter = std::forward(filter)]() mutable { + return poller->async_poll(use_continuation, + [sender = std::move(sender), filter = std::move(filter)]( + int pid, + int tid, + const lnx::ProcPidStatFileRecord & statRecord, + const std::optional & /*statmRecord*/, + const std::optional & exe) -> poly_return_type { + // filter the pid/tid + if (!filter(pid, tid)) { + return start_with(boost::system::error_code {}); + } + return sender->async_send_comm_frame(pid, + tid, + exe ? exe->path() : "", + statRecord.getComm(), + use_continuation); + }); }, token); } + + template + auto async_read_proc_sys_dependencies(Executor & executor, std::shared_ptr sender, CompletionToken && token) + { + return async_read_proc_sys_dependencies( + executor, + std::move(sender), + [](int, int) { return true; }, + std::forward(token)); + } } diff --git a/daemon/async/proc/async_wait_for_process.h b/daemon/async/proc/async_wait_for_process.h index ec4456f3..84f1bb7e 100644 --- a/daemon/async/proc/async_wait_for_process.h +++ b/daemon/async/proc/async_wait_for_process.h @@ -9,12 +9,14 @@ #include +#include #include #include namespace async { + /** Asynchronously polls /proc to find the given command, and returns the PIDs associated with it. - * + * * @tparam Executor Executor type */ template @@ -25,7 +27,7 @@ namespace async { * @param executor Executor instance to run the timer on * @param command Command to poll for */ - async_wait_for_process_t(Executor & executor, std::string_view command) + async_wait_for_process_t(Executor const & executor, std::string_view command) : state {std::make_shared(executor, command)} { } @@ -34,11 +36,20 @@ namespace async { async_wait_for_process_t(const async_wait_for_process_t &) = delete; async_wait_for_process_t & operator=(const async_wait_for_process_t &) = delete; + async_wait_for_process_t(async_wait_for_process_t &&) noexcept = default; + async_wait_for_process_t & operator=(async_wait_for_process_t &&) noexcept = default; + /** Destructor. * * Cancels the polling, if still running. */ - ~async_wait_for_process_t() { cancel(); } + ~async_wait_for_process_t() + { + // Check state so we don't try to cancel on a moved-from instance + if (state) { + cancel(); + } + } /** Start the asynchronous polling. * @@ -53,7 +64,6 @@ namespace async { auto start(std::chrono::duration interval, CompletionToken && token) { using namespace async::continuations; - using poly_return_type = polymorphic_continuation_t>; state->cancel = false; return async_initiate>>( @@ -72,45 +82,9 @@ namespace async { const auto found_pids = !pids.empty(); return start_with(!ec && !found_pids, ec, std::move(pids)); }, - [=](auto /*ec*/, auto /*pids*/) mutable -> poly_return_type { - auto pids = std::make_shared>(); - auto poller = std::make_shared>(self->strand); - - return poller->async_poll(use_continuation, - [pids, self](int pid, const lib::FsEntry & path) mutable { - if (check_path(self->command, self->real_path, path)) { - pids->insert(pid); - } - }) - | then([self, pids, interval](auto ec) -> poly_return_type { - return start_on(self->strand) - | do_if_else([ec]() { return !!ec; }, - [ec]() -> poly_return_type { - return start_with(ec, std::set {}); - }, // Exit early if async_poll returned an error - [self, pids, interval]() mutable -> poly_return_type { - return start_with() - | do_if_else( - [pids]() { return !pids->empty(); }, - [pids]() mutable { - // We've found some pids! Return the result - return start_with( - boost::system::error_code {}, - std::move(*pids)); - }, - [self, interval]() mutable { - // Otherwise, queue up the next read - self->timer.expires_after(interval); - return self->timer.async_wait( - use_continuation) - | then([self](auto ec) { - return start_with( - ec, - std::set {}); - }); - }); - }); - }); + [=](auto const & /*ec*/, auto const & /*pids*/) mutable -> poly_return_type { + // poll proc for all pids/tids that match our requirements + return poll_once(interval, self); }); }, token); @@ -123,10 +97,11 @@ namespace async { void cancel() { // We use a flag AND cancel the timer, in case the user has decided to cancel before the timer has started - state->cancel = true; - boost::asio::dispatch(state->strand, [=]() { + boost::asio::post(state->strand, [self = state]() { try { - state->timer.cancel(); + LOG_DEBUG("Cancelling wait-process polling"); + self->cancel = true; + self->timer.cancel(); } catch (boost::system::system_error & e) { LOG_WARNING("Timer cancellation failure in async_wait_for_process_t: %s", e.what()); @@ -135,14 +110,18 @@ namespace async { } private: - using strand_type = boost::asio::strand>; + using strand_type = decltype(boost::asio::make_strand(std::declval())); + using poly_return_type = + async::continuations::polymorphic_continuation_t>; + using poly_error_type = async::continuations::polymorphic_continuation_t; // We use a PIMPL-like idiom so that the polling loop is cancelled upon parent instance destruction, using // std::enable_shared_from_this could result in the loop being unstoppable if the 'handle' from the caller is // lost struct impl_t { - impl_t(Executor & executor, std::string_view command) - : strand {boost::asio::make_strand(executor)}, + impl_t(Executor const & executor, std::string_view command) + : executor(executor), + strand {boost::asio::make_strand(executor)}, timer {strand}, command {command}, real_path(lib::FsEntry::create(std::string {command}).realpath()), @@ -150,6 +129,7 @@ namespace async { { } + Executor executor; strand_type strand; boost::asio::steady_timer timer; std::string_view command; @@ -168,13 +148,13 @@ namespace async { // cmdline is separated by nulls so use c_str() to extract the command name const std::string tested_command {cmdline.c_str()}; // NOLINT(readability-redundant-string-cstr) if (!tested_command.empty()) { - LOG_DEBUG("Wait for Process: Scanning '%s': cmdline[0] = '%s'", + LOG_TRACE("Wait for Process: Scanning '%s': cmdline[0] = '%s'", path.path().c_str(), tested_command.c_str()); // track it if they are the same string if (command == tested_command) { - LOG_DEBUG(" Selected as cmdline matches"); + LOG_TRACE(" Selected as cmdline matches"); return true; } @@ -184,14 +164,14 @@ namespace async { // they are the same executable command if (real_path && tested_real_path && (*real_path == *tested_real_path)) { - LOG_DEBUG(" Selected as realpath matches (%s)", real_path->path().c_str()); + LOG_TRACE(" Selected as realpath matches (%s)", real_path->path().c_str()); return true; } // the basename of the command matches the command name // (e.g. /usr/bin/ls == ls) if (tested_command_path.name() == command) { - LOG_DEBUG(" Selected as name matches"); + LOG_TRACE(" Selected as name matches"); return true; } } @@ -204,7 +184,7 @@ namespace async { // they are the same executable command if (tested_real_path && (*real_path == *tested_real_path)) { - LOG_DEBUG("Wait for Process: Selected as exe matches (%s)", real_path->path().c_str()); + LOG_TRACE("Wait for Process: Selected as exe matches (%s)", real_path->path().c_str()); return true; } } @@ -212,6 +192,58 @@ namespace async { return false; } + template + static poly_return_type poll_once(std::chrono::duration interval, std::shared_ptr self) + { + using namespace async::continuations; + + auto pids = std::make_shared>(); + auto poller = make_async_proc_poller(self->executor); + + return poller->async_poll(use_continuation, + [pids, self](int pid, const lib::FsEntry & path) mutable -> poly_error_type { + if (self->cancel) { + return start_with( + boost::system::error_code {boost::asio::error::operation_aborted}); + } + if (check_path(self->command, self->real_path, path)) { + pids->insert(pid); + } + return start_with(boost::system::error_code {}); + }) + | then([self, pids, interval](auto ec) -> poly_return_type { + // Exit early if async_poll returned an error + if (ec) { + return start_with(ec, std::set {}); + } + + // We've found some pids? Return the result + if (!pids->empty()) { + return start_with(boost::system::error_code {}, std::move(*pids)); + } + + // Otherwise, queue up the next read + return start_on(self->strand) // + | then([self, interval]() { + self->timer.expires_after(interval); + return self->timer.async_wait(use_continuation) + | then([self](auto ec) { return start_with(ec, std::set {}); }); + }); + }); + } + std::shared_ptr state; }; + + template, bool> = false> + auto make_async_wait_for_process(Executor const & ex, std::string_view command) + { + return std::make_shared>(ex, command); + } + + template, bool> = false> + auto make_async_wait_for_process(ExecutionContext & context, std::string_view command) + { + return make_async_wait_for_process(context.get_executor(), command); + } } diff --git a/daemon/async/proc/process_monitor.cpp b/daemon/async/proc/process_monitor.cpp new file mode 100644 index 00000000..c227ca22 --- /dev/null +++ b/daemon/async/proc/process_monitor.cpp @@ -0,0 +1,377 @@ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ + +#include "async/proc/process_monitor.hpp" + +#include "Logging.h" +#include "async/continuations/operations.h" +#include "async/proc/process_state.hpp" +#include "async/proc/process_state_tracker.hpp" +#include "lib/Assert.h" +#include "lib/error_code_or.hpp" +#include "lib/forked_process.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +namespace async::proc { + process_monitor_t::error_event_tracker_t process_monitor_t::await_get_common(process_uid_t uid) + { + // find the process + auto it = process_states.find(uid); + if (it == process_states.end()) { + // send terminated error if it is not known + LOG_TRACE("UID[%" PRIu64 "] Cannot find UID", std::uint64_t(uid)); + return error_and_t { + boost::system::errc::make_error_code(boost::system::errc::no_such_process), + process_monitor_event_t(uid, + 0, + ptrace_event_type_t::error, + ptrace_process_state_t::no_such_process, + ptrace_process_origin_t::forked, + 0), + }; + } + return &(it->second); + } + + void process_monitor_t::do_async_wait_event(process_uid_t uid, process_event_continuation_t handler) + { + // get the already tracked item + auto const pt_or_error = await_get_common(uid); + + auto const * error = lib::get_error(pt_or_error); + if (error != nullptr) { + resume_continuation(strand.context(), std::move(handler), error->first, error->second); + return; + } + + auto * const pt = lib::get_value(pt_or_error); + + runtime_assert(pt != nullptr, "pt must not be nullptr"); + + // is there a current handler ? cancel it with the current state + auto & metadata = pt->get_metadata(); + if (metadata.queued_handler) { + LOG_TRACE("UID[%" PRIu64 "] Terminating old handler", std::uint64_t(uid)); + resume_continuation(strand.context(), + std::move(metadata.queued_handler), + boost::system::errc::make_error_code(boost::system::errc::operation_canceled), + process_monitor_event_t(uid, + pt->get_pid(), + ptrace_event_type_t::error, + pt->get_state(), + pt->get_origin(), + pt->get_status_code())); + } + + LOG_TRACE("UID[%" PRIu64 "] Saving new handler", std::uint64_t(uid)); + + // store the new handler + metadata.queued_handler = std::move(handler); + + // check / flush a queued event + if (flush_events(*pt)) { + process_states.erase(uid); + do_check_all_terminated(); + } + } + + void process_monitor_t::do_async_wait_all_terminated(error_continuation_t handler) + { + // abort the old handler, if it was set + error_continuation_t old_handler {std::move(all_terminated_handler)}; + if (old_handler) { + resume_continuation(strand.context(), + std::move(old_handler), + boost::system::errc::make_error_code(boost::system::errc::operation_canceled)); + } + + // process existing flag + if (all_terminated_flag) { + // reset the flag + all_terminated_flag = false; + + // invoke the new handler + resume_continuation(strand.context(), std::move(handler), {}); + } + // save the new handler for later + else { + all_terminated_handler = std::move(handler); + } + } + + void process_monitor_t::cancel() + { + // abort / erase our tracked process state since the child will not be ptracing and will not have any known children (at this point) + for (auto & entry : process_states) { + process_event_continuation_t handler {std::move(entry.second.get_metadata().queued_handler)}; + if (handler) { + resume_continuation(strand.context(), + std::move(handler), + boost::system::errc::make_error_code(boost::system::errc::operation_canceled), + process_monitor_event_t(entry.first, + entry.second.get_pid(), + ptrace_event_type_t::error, + entry.second.get_state(), + entry.second.get_origin(), + entry.second.get_status_code())); + } + } + + if (all_terminated_handler) { + resume_continuation(strand.context(), + std::move(all_terminated_handler), + boost::system::errc::make_error_code(boost::system::errc::operation_canceled)); + } + } + + process_monitor_t::error_and_t process_monitor_t::do_async_fork_exec( + bool prepend_command, + std::string const & cmd, + std::vector const & args, + boost::filesystem::path const & cwd, + std::optional> const & uid_gid, + lib::stdio_fds_t stdio_fds) + { + // fork the process and check for any errors + auto result = + lib::forked_process_t::fork_process(prepend_command, cmd, args, cwd, uid_gid, std::move(stdio_fds)); + auto const * error = lib::get_error(result); + if (error != nullptr) { + return {*error, fork_result_t {}}; + } + + // the process must have forked successfully + auto & forked_process = lib::get_value(result); + runtime_assert(!!forked_process, "expected valid forked process"); + auto const pid = forked_process.get_pid(); + auto const uid = process_uid_t(uid_counter++); + + // insert the entry into the process table + auto [it, inserted] = + process_states.insert_or_assign(uid, process_tracker_t(uid, pid, ptrace_process_origin_t::forked)); + + runtime_assert(inserted, "expected uid to be unique"); + + // update state + it->second.process_fork_complete(*this); + + // check / flush a queued event + if (flush_events(it->second)) { + process_states.erase(it); + do_check_all_terminated(); + } + + return {boost::system::error_code {}, fork_result_t {uid, std::move(forked_process)}}; + } + + process_uid_t process_monitor_t::do_async_monitor_forked_pid(pid_t pid) + { + auto const uid = process_uid_t(uid_counter++); + + // insert the entry into the process table + auto [it, inserted] = + process_states.insert_or_assign(uid, process_tracker_t(uid, pid, ptrace_process_origin_t::forked)); + + runtime_assert(inserted, "expected uid to be unique"); + + // update state + it->second.process_fork_complete(*this); + + // check / flush a queued event + if (flush_events(it->second)) { + process_states.erase(it); + do_check_all_terminated(); + } + + return uid; + } + + /** Handle the sigchld event */ + void process_monitor_t::on_sigchild() + { + using namespace async::continuations; + + // iterate each child agent and check if it terminated. + // if so, notify its worker and remove it from the map. + // + // We don't use waitpid(0 or -1, ...) since there are other waitpid calls that block on a single uid and we dont + // want to swallow the process event from them + spawn("SIGCHLD handler", + start_on(strand) // + | then([this]() { + // ignore if nothing monitored + if (process_states.empty()) { + return; + } + + // check all the child processes + for (auto it = process_states.begin(); it != process_states.end();) { + if (do_waitpid_for(it->second)) { + it = process_states.erase(it); + } + else { + ++it; + } + } + + // stop if no more items + do_check_all_terminated(); + })); + } + + /** Check the exit status for some worker process */ + bool process_monitor_t::do_waitpid_for(process_tracker_t & process_tracker) + { + int wstatus = 0; + + while (true) { + auto const result = lib::waitpid(process_tracker.get_pid(), &wstatus, WNOHANG); + auto const error = errno; + + // no change? + if (result == 0) { + return false; + } + + // error? + if (result == pid_t(-1)) { + // ignore agains + if ((error == EINTR) || (error == EAGAIN) || (error == EWOULDBLOCK)) { + continue; + } + + // report other errors + if (error != ECHILD) { + LOG_DEBUG("waitpid reports uid=%" PRIu64 " unexpected error %d", + std::uint64_t(process_tracker.get_uid()), + error); + } + else { + LOG_DEBUG("waitpid reports uid=%" PRIu64 " is terminated", + std::uint64_t(process_tracker.get_uid())); + } + + // process the status + process_tracker.on_waitpid_echild(*this); + } + else { + // process the status + LOG_TRACE("Got waitpid(result=%d, wstatus=%d, pid=%d, uid=%" PRIu64 ")", + result, + wstatus, + process_tracker.get_pid(), + std::uint64_t(process_tracker.get_uid())); + process_tracker.process_wait_status(wstatus, *this); + } + + return flush_events(process_tracker); + } + } + + bool process_monitor_t::on_process_state_changed(process_tracker_t & pt) + { + queue_event(pt, + pt.get_uid(), + pt.get_pid(), + ptrace_event_type_t::state_change, + pt.get_state(), + pt.get_origin(), + pt.get_status_code()); + + return true; + } + + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) + void process_monitor_t::queue_event(process_tracker_t & pt, + process_uid_t uid, + pid_t pid, + ptrace_event_type_t type, + ptrace_process_state_t state, + ptrace_process_origin_t origin, + int status) + { + pt.get_metadata().queued_events.emplace_back(uid, pid, type, state, origin, status); + } + + bool process_monitor_t::flush_events(process_tracker_t & pt) + { + LOG_TRACE("UID[%" PRIu64 "] flushing event queue...", std::uint64_t(pt.get_uid())); + + auto & metadata = pt.get_metadata(); + + // the caller should be holding the mutex lock on entry + if ((!metadata.queued_events.empty()) && metadata.queued_handler) { + LOG_TRACE("UID[%" PRIu64 "] triggering one event handler...", std::uint64_t(pt.get_uid())); + + // remove just the head event + auto event = metadata.queued_events.front(); + metadata.queued_events.pop_front(); + + // move the queued_handler into a local copy, clearing the one in pt and invoke the handler + resume_continuation(strand.context(), std::move(metadata.queued_handler), {}, event); + + // are there any events left on a terminated process? + if (metadata.queued_events.empty() + && ((pt.get_state() == ptrace_process_state_t::terminated_exit) + || (pt.get_state() == ptrace_process_state_t::terminated_signal))) { + LOG_TRACE("UID[%" PRIu64 "] is terminated and has no pending events", std::uint64_t(pt.get_uid())); + + return true; + } + + if (logging::is_log_enable_trace()) { + LOG_TRACE("The following pids are still tracked: "); + for (auto const & entry : process_states) { + LOG_TRACE("... UID[%" PRIu64 "] {ppid=%d, pid=%d, state=%s, origin=%s}", + std::uint64_t(entry.second.get_uid()), + entry.second.get_ppid(), + entry.second.get_pid(), + to_cstring(entry.second.get_state()), + to_cstring(entry.second.get_origin())); + } + } + } + + return false; + } + + void process_monitor_t::do_check_all_terminated() + { + // send all-terminated message? + if (!process_states.empty()) { + return; + } + + LOG_TRACE("All traced processes are gone"); + + // move out the existing handler + error_continuation_t all_terminated_handler {std::move(this->all_terminated_handler)}; + + if (all_terminated_handler) { + // reset the flag + all_terminated_flag = false; + + // call the handler + resume_continuation(strand.context(), std::move(all_terminated_handler), {}); + } + else { + // set the flag for later + all_terminated_flag = true; + } + } +} diff --git a/daemon/async/proc/process_monitor.hpp b/daemon/async/proc/process_monitor.hpp new file mode 100644 index 00000000..ebdf11dc --- /dev/null +++ b/daemon/async/proc/process_monitor.hpp @@ -0,0 +1,230 @@ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "async/continuations/async_initiate.h" +#include "async/continuations/operations.h" +#include "async/continuations/stored_continuation.h" +#include "async/proc/process_state.hpp" +#include "async/proc/process_state_tracker.hpp" +#include "lib/error_code_or.hpp" +#include "lib/forked_process.h" +#include "lib/source_location.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace async::proc { + + /** + * Event data for some change in some monitored process + */ + struct process_monitor_event_t { + process_uid_t uid; + pid_t pid; + ptrace_event_type_t type; + ptrace_process_state_t state; + ptrace_process_origin_t origin; + int status; + + constexpr process_monitor_event_t(process_uid_t uid, + pid_t pid, + ptrace_event_type_t type, + ptrace_process_state_t state, + ptrace_process_origin_t origin, + int status) noexcept + : uid(uid), pid(pid), type(type), state(state), origin(origin), status(status) + { + } + }; + + /** + * A class designed to ptrace one or more pid, tracking their lifecycle (attach, clone/exec/fork/vfork, exit) and that of subsequent children. + * Lifecycle events are queued into a set of ptrace_monitor_queue_t objects as they happen. + */ + class process_monitor_t { + public: + struct fork_result_t { + process_uid_t uid; + lib::forked_process_t process; + }; + + /** Constructor */ + explicit process_monitor_t(boost::asio::io_context & context) : strand(context) {} + + /** Wait for the next asynchronous event */ + template + auto async_wait_event(process_uid_t uid, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate_explicit( + [this, uid](auto && stored_continuation) { + submit(start_on(strand) // + | then([this, uid, sc = stored_continuation.move()]() mutable { + do_async_wait_event(uid, std::move(sc)); + }), + stored_continuation.get_exceptionally()); + }, + std::forward(token)); + } + + /** Wait for the case where all tracked processes exit */ + template + auto async_wait_all_terminated(CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate_explicit( + [this](auto && stored_continuation) { + submit(start_on(strand) // + | then([this, sc = stored_continuation.move()]() mutable { + do_async_wait_all_terminated(std::move(sc)); + }), + stored_continuation.get_exceptionally()); + }, + std::forward(token)); + } + + /** Fork/Exec a new child process */ + template + auto async_fork_exec(bool prepend_command, + std::string cmd, + std::vector args, + boost::filesystem::path cwd, + std::optional> const & uid_gid, + lib::stdio_fds_t stdio_fds, + CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [this, + prepend_command, + cmd = std::move(cmd), + args = std::move(args), + cwd = std::move(cwd), + uid_gid = uid_gid, + fds = std::move(stdio_fds)]() mutable { + return start_on(strand) // + | then([this, + prepend_command, + cmd = std::move(cmd), + args = std::move(args), + cwd = std::move(cwd), + uid_gid, + fds = std::move(fds)]() mutable { + return do_async_fork_exec(prepend_command, cmd, args, cwd, uid_gid, std::move(fds)); + }) // + | unpack_tuple(); + }, + std::forward(token)); + } + + /** Monitor an externally forked process */ + template + auto async_monitor_forked_pid(pid_t pid, CompletionToken && token) + { + using namespace async::continuations; + + return async_initiate( + [this, pid]() mutable { + return start_on(strand) // + | then([this, pid]() mutable { return do_async_monitor_forked_pid(pid); }); + }, + std::forward(token)); + } + + /** Notify of a SIGCHLD event */ + void on_sigchild(); + + /** Abort all pending events, stop listening for new ones */ + void cancel(); + + private: + using process_event_continuation_t = + async::continuations::stored_continuation_t; + using error_continuation_t = async::continuations::stored_continuation_t; + + struct process_monitor_metadata_t { + std::deque queued_events {}; + process_event_continuation_t queued_handler {}; + }; + + using process_tracker_t = process_state_tracker_t; + + template + using error_and_t = std::pair; + using error_event_tracker_t = lib::error_code_or_t>; + + friend process_tracker_t; + + template + static constexpr error_and_t unpack_error(error_event_tracker_t && error_or_pt, + Handler && handler) + { + auto const * error = lib::get_error(error_or_pt); + if (error != nullptr) { + return *error; + } + return handler(lib::get_value(error_or_pt)); + } + + boost::asio::io_context::strand strand; + error_continuation_t all_terminated_handler {}; + std::map process_states {}; + std::uint64_t uid_counter {0}; + bool all_terminated_flag {false}; + + // create / get helpers + error_event_tracker_t await_get_common(process_uid_t uid); + + // async_xxx handlers + [[nodiscard]] error_and_t do_async_fork_exec( + bool prepend_command, + std::string const & cmd, + std::vector const & args, + boost::filesystem::path const & cwd, + std::optional> const & uid_gid, + lib::stdio_fds_t stdio_fds); + [[nodiscard]] process_uid_t do_async_monitor_forked_pid(pid_t pid); + void do_async_wait_event(process_uid_t uid, process_event_continuation_t handler); + void do_async_wait_all_terminated(error_continuation_t handler); + + // single handling + [[nodiscard]] bool do_waitpid_for(process_tracker_t & process_tracker); + + // callbacks for process_state_tracker_t + [[nodiscard]] bool on_process_state_changed(process_tracker_t & pt); + + // event processing + void queue_event(process_tracker_t & pt, + process_uid_t uid, + pid_t pid, + ptrace_event_type_t type, + ptrace_process_state_t state, + ptrace_process_origin_t origin, + int status); + [[nodiscard]] bool flush_events(process_tracker_t & pt); + + void do_check_all_terminated(); + }; +} diff --git a/daemon/async/proc/process_state.hpp b/daemon/async/proc/process_state.hpp new file mode 100644 index 00000000..8f7038e4 --- /dev/null +++ b/daemon/async/proc/process_state.hpp @@ -0,0 +1,77 @@ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include + +namespace async::proc { + /** Used to uniquely identify a process in case of pid-reuse */ + enum class process_uid_t : std::uint64_t; + + /** + * Enumerates the possible event types + */ + enum class ptrace_event_type_t { + state_change, + error, + }; + + /** + * Enumerates the possible traced process states + */ + enum class ptrace_process_state_t { + attaching = 0, + attached, + terminated_exit, + terminated_signal, + no_such_process, + }; + + /** + * Enumerates how the process was discovered + */ + enum class ptrace_process_origin_t { + /** The process is a subprocess created by 'fork' */ + forked, + }; + + constexpr char const * to_cstring(ptrace_event_type_t state) noexcept + { + switch (state) { + case ptrace_event_type_t::state_change: + return "state_change"; + case ptrace_event_type_t::error: + return "error"; + default: + return "???"; + } + } + + constexpr char const * to_cstring(ptrace_process_state_t state) noexcept + { + switch (state) { + case ptrace_process_state_t::attaching: + return "attaching"; + case ptrace_process_state_t::attached: + return "attached"; + case ptrace_process_state_t::terminated_exit: + return "terminated_exit"; + case ptrace_process_state_t::terminated_signal: + return "terminated_signal"; + case ptrace_process_state_t::no_such_process: + return "no_such_process"; + default: + return "???"; + } + } + + constexpr char const * to_cstring(ptrace_process_origin_t state) noexcept + { + switch (state) { + case ptrace_process_origin_t::forked: + return "forked"; + default: + return "???"; + } + } +} diff --git a/daemon/async/proc/process_state_tracker.hpp b/daemon/async/proc/process_state_tracker.hpp new file mode 100644 index 00000000..61f84912 --- /dev/null +++ b/daemon/async/proc/process_state_tracker.hpp @@ -0,0 +1,132 @@ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "Logging.h" +#include "async/proc/process_state.hpp" +#include "async/proc/wait.hpp" + +#include + +#include + +namespace async::proc { + + /** + * Tracked process state machine + */ + template + class process_state_tracker_t { + public: + using callbacks_t = Callbacks; + using metadata_t = Metadata; + + /** + * Constructor + * @param uid The process uid + * @param pid The process (thread) id + * @param origin How the process was discovered + */ + process_state_tracker_t(process_uid_t uid, pid_t pid, ptrace_process_origin_t origin) + : uid(uid), ppid(0), pid(pid), origin(origin) + { + } + + [[nodiscard]] process_uid_t get_uid() const { return uid; } + [[nodiscard]] pid_t get_ppid() const { return ppid; }; + [[nodiscard]] pid_t get_pid() const { return pid; } + [[nodiscard]] ptrace_process_origin_t get_origin() const { return origin; } + [[nodiscard]] ptrace_process_state_t get_state() const { return state; } + [[nodiscard]] int get_status_code() { return status_code; } + [[nodiscard]] metadata_t const & get_metadata() const { return metadata; } + [[nodiscard]] metadata_t & get_metadata() { return metadata; } + + /** + * Process the next 'status' value received from waitpid for this process + * + * @param status The status value from waitpid + * @param callbacks The object used to manipulate external state (such as ptrace, track new process etc) + */ + void process_wait_status(unsigned status, callbacks_t & callbacks) + { + // for debugging + LOG_TRACE("PID[%d] received wait status update (status = 0x%x)", pid, status); + + // and exit events + if (w_if_exited(status)) { + return on_process_exited(w_exit_status(status), callbacks); + } + + // and signal termination events + if (w_if_signaled(status)) { + return on_process_signaled(w_term_sig(status), callbacks); + } + } + + /** Called on successful fork completion */ + void process_fork_complete(callbacks_t & callbacks) + { + if (origin != ptrace_process_origin_t::forked) { + LOG_DEBUG("PID[%d] Unexpected origin for fork complete", pid); + return; + } + if (state == ptrace_process_state_t::attaching) { + transition_state(ptrace_process_state_t::attached, 0, callbacks); + } + } + + /** Called when waitpid returns ECHILD */ + void on_waitpid_echild(callbacks_t & callbacks) + { + // just assume it exited ok, if not already exited + if ((state != ptrace_process_state_t::terminated_exit) + && (state != ptrace_process_state_t::terminated_signal)) { + transition_state(ptrace_process_state_t::terminated_exit, 0, callbacks); + } + } + + private: + process_uid_t uid; + pid_t ppid; + pid_t pid; + int status_code; + ptrace_process_origin_t origin; + ptrace_process_state_t state {ptrace_process_state_t::attaching}; + metadata_t metadata {}; + + void on_process_exited(int exit_status, callbacks_t & callbacks) + { + LOG_DEBUG("PID[%d] exited with status code %d", pid, exit_status); + + transition_state(ptrace_process_state_t::terminated_exit, exit_status, callbacks); + } + + void on_process_signaled(int signo, callbacks_t & callbacks) + { + LOG_DEBUG("PID[%d] exited with signal %d (%s)", + pid, + signo, + strsignal(signo)); // NOLINT(concurrency-mt-unsafe) + + transition_state(ptrace_process_state_t::terminated_signal, signo, callbacks); + } + + void transition_state(ptrace_process_state_t to_state, int status, callbacks_t & callbacks) + { + if (state == to_state) { + // no state change; do nothing + return; + } + + // move to the new state + LOG_TRACE("PID[%d] transitioned from %s to %s", pid, to_cstring(state), to_cstring(to_state)); + + // update our state + state = to_state; + status_code = status; + + // notify the callbacks + (void) callbacks.on_process_state_changed(*this); + } + }; +} diff --git a/daemon/async/proc/wait.hpp b/daemon/async/proc/wait.hpp new file mode 100644 index 00000000..3c1665c1 --- /dev/null +++ b/daemon/async/proc/wait.hpp @@ -0,0 +1,49 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#include +#include + +namespace async::proc { + constexpr bool w_if_continued(unsigned status) + { + //NOLINTNEXTLINE(hicpp-signed-bitwise) + return WIFCONTINUED(status); + } + + constexpr bool w_if_exited(unsigned status) + { + //NOLINTNEXTLINE(hicpp-signed-bitwise) + return WIFEXITED(status); + } + + constexpr bool w_if_signaled(unsigned status) + { + //NOLINTNEXTLINE(hicpp-signed-bitwise) + return WIFSIGNALED(status); + } + + constexpr bool w_if_stopped(unsigned status) + { + //NOLINTNEXTLINE(hicpp-signed-bitwise) + return WIFSTOPPED(status); + } + + constexpr unsigned w_exit_status(unsigned status) + { + //NOLINTNEXTLINE(hicpp-signed-bitwise) + return WEXITSTATUS(status); + } + + constexpr unsigned w_stop_sig(unsigned status) + { + //NOLINTNEXTLINE(hicpp-signed-bitwise) + return WSTOPSIG(status); + } + + constexpr unsigned w_term_sig(unsigned status) + { + //NOLINTNEXTLINE(hicpp-signed-bitwise) + return WTERMSIG(status); + } + +} diff --git a/daemon/capture/CaptureProcess.cpp b/daemon/capture/CaptureProcess.cpp index e4f8dc64..a4131a1b 100644 --- a/daemon/capture/CaptureProcess.cpp +++ b/daemon/capture/CaptureProcess.cpp @@ -13,6 +13,7 @@ #include "Sender.h" #include "SessionData.h" #include "StreamlineSetupLoop.h" +#include "agents/spawn_agent.h" #include "capture/internal/UdpListener.h" #include "lib/FileDescriptor.h" #include "lib/Process.h" @@ -128,6 +129,7 @@ namespace { case State::EXIT: break; } + return currentStateAndChildPid; } @@ -220,6 +222,7 @@ namespace { Drivers & drivers, OlyServerSocket & sock, OlyServerSocket * otherSock, + capture::capture_process_event_listener_t & event_listener, logging::last_log_error_supplier_t last_log_error_supplier, logging::log_setup_supplier_t log_setup_supplier) { @@ -236,7 +239,6 @@ namespace { driver->preChildFork(); } - int parent_pid = getpid(); int pid = fork(); if (pid < 0) { // Error @@ -244,8 +246,9 @@ namespace { ss << errno; throw GatorException(ss.str()); } - else if (pid == 0) { - gator::process::set_parent_death_signal(SIGINT); + + if (pid == 0) { + gator::process::set_parent_death_signal(SIGKILL); // Child for (const auto & driver : drivers.getAll()) { @@ -260,13 +263,26 @@ namespace { monitor.close(); annotateListenerPtr.reset(); - // TODO: android spawner - agents::simple_agent_spawner_t spawner {}; + // create the agent process spawner + std::unique_ptr spawner {}; + + if ((!gSessionData.mSystemWide) && (gSessionData.mAndroidPackage != nullptr)) { + spawner = std::make_unique(gSessionData.mAndroidPackage); + } + else { + spawner = std::make_unique(); + } - auto child = - Child::createLive(spawner, drivers, client, last_log_error_supplier, std::move(log_setup_supplier)); - child->run(parent_pid); + auto child = Child::createLive(*spawner, + drivers, + client, + event_listener, + last_log_error_supplier, + std::move(log_setup_supplier)); + child->run(); child.reset(); + spawner.reset(); // the dtor may perform some necessary cleanup + exit(0); } else { @@ -281,13 +297,13 @@ namespace { StateAndPid doLocalCapture(Drivers & drivers, const Child::Config & config, + capture::capture_process_event_listener_t & event_listener, logging::last_log_error_supplier_t last_log_error_supplier, logging::log_setup_supplier_t log_setup_supplier) { for (const auto & driver : drivers.getAll()) { driver->preChildFork(); } - int parent_pid = getpid(); int pid = fork(); if (pid < 0) { // Error @@ -303,15 +319,23 @@ namespace { monitor.close(); annotateListenerPtr.reset(); - // TODO: android spawner - agents::simple_agent_spawner_t spawner {}; + // create the agent process spawner + std::unique_ptr spawner {}; + + if ((!gSessionData.mSystemWide) && (gSessionData.mAndroidPackage != nullptr)) { + spawner = std::make_unique(gSessionData.mAndroidPackage); + } + else { + spawner = std::make_unique(); + } - auto child = Child::createLocal(spawner, + auto child = Child::createLocal(*spawner, drivers, config, + event_listener, std::move(last_log_error_supplier), std::move(log_setup_supplier)); - child->run(parent_pid); + child->run(); LOG_DEBUG("gator-child finished running"); child.reset(); @@ -336,7 +360,7 @@ int capture::beginCaptureProcess(const ParserResult & result, capture::capture_process_event_listener_t & event_listener) { // Set to high priority - if (setpriority(PRIO_PROCESS, syscall(__NR_gettid), high_priority) == -1) { + if (setpriority(PRIO_PROCESS, lib::gettid(), high_priority) == -1) { LOG_DEBUG("setpriority() failed"); } @@ -347,6 +371,8 @@ int capture::beginCaptureProcess(const ParserResult & result, // only enable when running in system-wide mode bool enable_annotation_listener = result.mSystemWide; + StateAndPid stateAndChildPid = {.state = State::IDLE, .pid = -1}; + try { std::unique_ptr socketUds; std::unique_ptr socketTcp; @@ -370,8 +396,6 @@ int capture::beginCaptureProcess(const ParserResult & result, throw GatorException("Monitor setup failed"); } - StateAndPid stateAndChildPid = {.state = State::IDLE, .pid = -1}; - // If the command line argument is a session xml file, no need to open a socket if (gSessionData.mLocalCapture) { Child::Config childConfig {{}, {}}; @@ -384,7 +408,8 @@ int capture::beginCaptureProcess(const ParserResult & result, for (const auto & spe : result.mSpeConfigs) { childConfig.spes.insert(spe); } - stateAndChildPid = doLocalCapture(drivers, childConfig, last_log_error_supplier, log_setup_supplier); + stateAndChildPid = + doLocalCapture(drivers, childConfig, event_listener, last_log_error_supplier, log_setup_supplier); } else { // enable TCP socket @@ -420,6 +445,7 @@ int capture::beginCaptureProcess(const ParserResult & result, drivers, *socketUds, socketTcp.get(), + event_listener, last_log_error_supplier, log_setup_supplier); } @@ -428,6 +454,7 @@ int capture::beginCaptureProcess(const ParserResult & result, drivers, *socketTcp, socketUds.get(), + event_listener, last_log_error_supplier, log_setup_supplier); } @@ -460,19 +487,7 @@ int capture::beginCaptureProcess(const ParserResult & result, throw GatorException(ss.str()); } - /* this is a horrible hack so that we don't have to implement an IPC mechanism between - * Child.cpp (which is going to be removed soon) and the capture controller. We have - * the child process send us a SIGUSR1 when the target app needs to be started so that - * we can tell the controller process - which has the correct privileges - to do that - * for us. - */ - if (signum == SIGUSR1) { - LOG_DEBUG("Got SIGUSR1 from Child. Notifying event listener."); - event_listener.waiting_for_target(); - } - else { - stateAndChildPid = handleSignal(stateAndChildPid, drivers, signum); - } + stateAndChildPid = handleSignal(stateAndChildPid, drivers, signum); } else { // shouldn't really happen unless we forgot to handle a new item @@ -486,6 +501,19 @@ int capture::beginCaptureProcess(const ParserResult & result, } catch (const GatorException & ex) { LOG_DEBUG("%s", ex.what()); + + // hard-kill the child process if its running + switch (stateAndChildPid.state) { + case State::CAPTURING: + LOG_DEBUG("Sending SIGKILL to child process"); + kill(-stateAndChildPid.pid, SIGKILL); + break; + case State::IDLE: + case State::EXITING: + case State::EXIT: + break; + } + handleException(); } diff --git a/daemon/capture/CaptureProcess.h b/daemon/capture/CaptureProcess.h index ff437e20..4021815c 100644 --- a/daemon/capture/CaptureProcess.h +++ b/daemon/capture/CaptureProcess.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ #pragma once @@ -33,8 +33,9 @@ namespace capture { * @brief Called by the capturing agent when it has performed any required initialisation * (e.g. enumerating & configuring counters) and it is ready for the target application to * be started. + * @return true unless the android package or other target could not be started */ - virtual void waiting_for_target() = 0; + [[nodiscard]] virtual bool waiting_for_target() = 0; }; using GatorReadyCallback = std::function; diff --git a/daemon/capture/Environment.cpp b/daemon/capture/Environment.cpp index 440b99d7..cb99e6eb 100644 --- a/daemon/capture/Environment.cpp +++ b/daemon/capture/Environment.cpp @@ -116,16 +116,20 @@ namespace { } } -LinuxEnvironmentConfig::LinuxEnvironmentConfig(SessionData & sessionData) noexcept : CaptureEnvironment() +LinuxEnvironmentConfig::LinuxEnvironmentConfig() noexcept { configureRlimit(); - configurePerfMmapSize(sessionData); } LinuxEnvironmentConfig::~LinuxEnvironmentConfig() noexcept { } +void LinuxEnvironmentConfig::postInit(SessionData & sessionData) +{ + configurePerfMmapSize(sessionData); +} + OsType capture::detectOs() { #ifdef __ANDROID__ @@ -151,15 +155,15 @@ OsType capture::detectOs() #endif } -std::unique_ptr capture::prepareCaptureEnvironment(SessionData & sessionData) +std::unique_ptr capture::prepareCaptureEnvironment() { switch (detectOs()) { case OsType::Android: { - return std::make_unique(sessionData, classifyUser()); + return std::make_unique(classifyUser()); } case OsType::Linux: - return std::make_unique(sessionData); + return std::make_unique(); } throw GatorException("Invalid capture environment"); } diff --git a/daemon/capture/Environment.h b/daemon/capture/Environment.h index 8caa5641..a3ccd954 100644 --- a/daemon/capture/Environment.h +++ b/daemon/capture/Environment.h @@ -19,18 +19,21 @@ namespace capture { CaptureEnvironment(const CaptureEnvironment &) = delete; CaptureEnvironment & operator=(const CaptureEnvironment &) = delete; + + virtual void postInit(SessionData & sessionData) = 0; }; class LinuxEnvironmentConfig : public CaptureEnvironment { public: - LinuxEnvironmentConfig(SessionData & sessionData) noexcept; + LinuxEnvironmentConfig() noexcept; + + ~LinuxEnvironmentConfig() noexcept override; - virtual ~LinuxEnvironmentConfig() noexcept; + void postInit(SessionData & sessionData) override; }; OsType detectOs(); - std::unique_ptr prepareCaptureEnvironment(SessionData & sessionData); - + std::unique_ptr prepareCaptureEnvironment(); } #endif diff --git a/daemon/capture/internal/UdpListener.h b/daemon/capture/internal/UdpListener.h index dde30ecf..d7eed165 100644 --- a/daemon/capture/internal/UdpListener.h +++ b/daemon/capture/internal/UdpListener.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2021-2022 by Arm Limited. All rights reserved. */ #pragma once @@ -9,10 +9,10 @@ #include "ProtocolVersion.h" #include +#include #include #include -#include #include namespace capture::internal { diff --git a/daemon/events-Cortex-A510.xml b/daemon/events-Cortex-A510.xml index b0f10602..a79a354e 100644 --- a/daemon/events-Cortex-A510.xml +++ b/daemon/events-Cortex-A510.xml @@ -1,4 +1,4 @@ - + @@ -54,6 +54,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/daemon/events-DSU.xml b/daemon/events-DSU.xml index cd927901..997c40cd 100644 --- a/daemon/events-DSU.xml +++ b/daemon/events-DSU.xml @@ -80,6 +80,20 @@ + + + + + + + + + + + + + + diff --git a/daemon/events-Mali-G310_hw.xml b/daemon/events-Mali-G310_hw.xml index 10497f8c..68f56bb5 100644 --- a/daemon/events-Mali-G310_hw.xml +++ b/daemon/events-Mali-G310_hw.xml @@ -3,7 +3,7 @@ - + @@ -90,8 +90,8 @@ - - + + @@ -100,8 +100,8 @@ - - + + diff --git a/daemon/events-Mali-G31_hw.xml b/daemon/events-Mali-G31_hw.xml index a8737fba..559da3ff 100644 --- a/daemon/events-Mali-G31_hw.xml +++ b/daemon/events-Mali-G31_hw.xml @@ -65,8 +65,8 @@ - - + + diff --git a/daemon/events-Mali-G510_hw.xml b/daemon/events-Mali-G510_hw.xml index a86ac58c..d1930ab8 100644 --- a/daemon/events-Mali-G510_hw.xml +++ b/daemon/events-Mali-G510_hw.xml @@ -3,7 +3,7 @@ - + @@ -90,8 +90,8 @@ - - + + @@ -100,8 +100,8 @@ - - + + diff --git a/daemon/events-Mali-G51_hw.xml b/daemon/events-Mali-G51_hw.xml index adf2ed40..fe4a0143 100644 --- a/daemon/events-Mali-G51_hw.xml +++ b/daemon/events-Mali-G51_hw.xml @@ -65,8 +65,8 @@ - - + + diff --git a/daemon/events-Mali-G52_hw.xml b/daemon/events-Mali-G52_hw.xml index d769b8c1..57212494 100644 --- a/daemon/events-Mali-G52_hw.xml +++ b/daemon/events-Mali-G52_hw.xml @@ -67,8 +67,8 @@ - - + + diff --git a/daemon/events-Mali-G57_hw.xml b/daemon/events-Mali-G57_hw.xml index 707d7ff4..1ef519d9 100644 --- a/daemon/events-Mali-G57_hw.xml +++ b/daemon/events-Mali-G57_hw.xml @@ -89,8 +89,8 @@ - - + + diff --git a/daemon/events-Mali-G610_hw.xml b/daemon/events-Mali-G610_hw.xml index 22c3ded6..6bd2ee3d 100644 --- a/daemon/events-Mali-G610_hw.xml +++ b/daemon/events-Mali-G610_hw.xml @@ -3,7 +3,7 @@ - + @@ -90,8 +90,8 @@ - - + + @@ -100,8 +100,8 @@ - - + + diff --git a/daemon/events-Mali-G68_hw.xml b/daemon/events-Mali-G68_hw.xml index ba21a16c..ce5fd247 100644 --- a/daemon/events-Mali-G68_hw.xml +++ b/daemon/events-Mali-G68_hw.xml @@ -89,8 +89,8 @@ - - + + diff --git a/daemon/events-Mali-G710_hw.xml b/daemon/events-Mali-G710_hw.xml index 61d839be..9c2a6bbd 100644 --- a/daemon/events-Mali-G710_hw.xml +++ b/daemon/events-Mali-G710_hw.xml @@ -3,7 +3,7 @@ - + @@ -90,8 +90,8 @@ - - + + @@ -100,8 +100,8 @@ - - + + diff --git a/daemon/events-Mali-G71_hw.xml b/daemon/events-Mali-G71_hw.xml index f02bb89d..e72685aa 100644 --- a/daemon/events-Mali-G71_hw.xml +++ b/daemon/events-Mali-G71_hw.xml @@ -65,8 +65,8 @@ - - + + diff --git a/daemon/events-Mali-G72_hw.xml b/daemon/events-Mali-G72_hw.xml index 3ddfe6ed..87b1e3a2 100644 --- a/daemon/events-Mali-G72_hw.xml +++ b/daemon/events-Mali-G72_hw.xml @@ -65,8 +65,8 @@ - - + + diff --git a/daemon/events-Mali-G76_hw.xml b/daemon/events-Mali-G76_hw.xml index a3b92bb8..ccca3936 100644 --- a/daemon/events-Mali-G76_hw.xml +++ b/daemon/events-Mali-G76_hw.xml @@ -67,8 +67,8 @@ - - + + diff --git a/daemon/events-Mali-G77_hw.xml b/daemon/events-Mali-G77_hw.xml index 3743d1d8..7d8d23db 100644 --- a/daemon/events-Mali-G77_hw.xml +++ b/daemon/events-Mali-G77_hw.xml @@ -89,8 +89,8 @@ - - + + diff --git a/daemon/events-Mali-G78_hw.xml b/daemon/events-Mali-G78_hw.xml index 908f78d6..cbbaf4ca 100644 --- a/daemon/events-Mali-G78_hw.xml +++ b/daemon/events-Mali-G78_hw.xml @@ -89,8 +89,8 @@ - - + + diff --git a/daemon/events-Mali_Misc.xml b/daemon/events-Mali_Misc.xml index 784ca337..22347552 100644 --- a/daemon/events-Mali_Misc.xml +++ b/daemon/events-Mali_Misc.xml @@ -1,5 +1,6 @@ - + + diff --git a/daemon/ipc/async_streamline_sender.h b/daemon/ipc/async_streamline_sender.h new file mode 100644 index 00000000..1e76d2c3 --- /dev/null +++ b/daemon/ipc/async_streamline_sender.h @@ -0,0 +1,129 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include "Logging.h" +#include "ipc/codec.h" +#include "ipc/raw_ipc_channel_sink.h" +#include "ipc/responses.h" +#include "lib/Assert.h" +#include "lib/AutoClosingFd.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ipc { + + class async_streamline_sender_t : public std::enable_shared_from_this { + + public: + /** Factory method */ + static std::shared_ptr create(boost::asio::io_context & io_context, + lib::AutoClosingFd && out, + bool is_local_capture) + { + return std::make_shared( + async_streamline_sender_t {io_context, std::move(out), is_local_capture}); + } + + /** + * Write some fixed-size message into the send buffer. + */ + template + auto async_send_message(Response message, CompletionToken && token) + { + using response_type = std::decay_t; + return boost::asio::async_initiate( + [st = shared_from_this(), message = std::forward(message)](auto && handler) mutable { + using Handler = decltype(handler); + st->do_async_send_message(std::forward(message), std::forward(handler)); + }, + token); + } + + private: + /** The IPC sink, for sending message to the agent */ + std::shared_ptr ipc_sink; + /** The stream if local capture */ + std::shared_ptr write_out; + boost::asio::io_context::strand strand; + bool is_local_capture = false; + + async_streamline_sender_t(boost::asio::io_context & io_context, + lib::AutoClosingFd && out, + bool is_local_capture) + : strand(io_context), is_local_capture(is_local_capture) + { + + if (!is_local_capture) { + ipc_sink = ipc::raw_ipc_channel_sink_t::create(io_context, std::move(out)); + } + else { + write_out = std::make_shared(io_context, out.release()); + } + } + + template + void do_async_send_message(Response message, Handler && handler) + { + using handler_type = std::decay_t; + static_assert(std::is_same_v); + + if (is_local_capture) { + //?? will the 000 file be incremented *000, *001 etc?? + // Write data to disk as long as it is not meta data + if (message.key == response_type::apc_data) { + auto buffer_ptr = std::make_shared>(std::move(message.payload)); + //payload length bytes + auto length = buffer_ptr->size(); + auto buffer_length = std::make_shared>(std::array({{ + (static_cast(length)), + (static_cast(length >> 8)), + (static_cast(length >> 16)), + (static_cast(length >> 24)), + }})); + //write payload length + boost::asio::async_write( + *write_out, + boost::asio::buffer(*buffer_length), + [st = shared_from_this(), + buffer_ptr, + buffer_length, + h = std::forward(handler)](auto const & ec, + std::size_t bytes_transferred) mutable { // + (void) buffer_length; + //write payload + boost::asio::async_write( + *(st->write_out), + boost::asio::buffer(*buffer_ptr), + [h = std::forward(h)](auto const & ec, + std::size_t bytes_transferred) mutable { // + return h(ec); // + }); + return h(ec); + }); + } + } + else { + ipc_sink->async_send_response( + std::move(message), + [h = std::forward(handler)](auto const & ec, auto const & /* msg */) mutable { // + h(ec); // + }); + } + } + }; +} diff --git a/daemon/ipc/codec.h b/daemon/ipc/codec.h index e3abcac0..de9504f1 100644 --- a/daemon/ipc/codec.h +++ b/daemon/ipc/codec.h @@ -26,6 +26,7 @@ #include "ipc/message_key.h" #include "ipc/message_traits.h" +#include "ipc/responses.h" #include "lib/Assert.h" #include "lib/Span.h" @@ -41,7 +42,7 @@ namespace ipc { /** Helper for simple data types that can be blitted directly from memory into the message without additonal encoding (such as strings, arrays of pods etc) */ - template + template struct byte_span_blob_codec_t { /** The blob type */ using value_type = T; @@ -61,12 +62,12 @@ namespace ipc { static constexpr std::size_t sg_writer_buffers_count = 2; /** The size of the length field */ - static constexpr std::size_t length_size = sizeof(std::size_t); + static constexpr std::size_t length_size = sizeof(U); /** Fill the sg_write_helper_type value */ static constexpr sg_write_helper_type fill_sg_write_helper_type(value_type const & buffer) { - using member_type = std::remove_cv; + using member_type = std::decay_t; static_assert( (std::is_array_v && is_valid_message_header_v>) || std::is_pod_v || std::is_integral_v || std::is_enum_v); @@ -102,7 +103,7 @@ namespace ipc { /** Read the suffix value from bytes (which must be set to the length given by read_suffix_length) */ static constexpr void read_suffix(lib::Span const & bytes, value_type & buffer) { - using member_type = std::remove_cv; + using member_type = std::decay_t; static_assert( (std::is_array_v && is_valid_message_header_v>) || std::is_pod_v || std::is_integral_v || std::is_enum_v); @@ -132,7 +133,7 @@ namespace ipc { /** Return a mutable buffer to store the suffix for some read operation via scatter-gather read */ static auto mutable_suffix_buffer(value_type & buffer, sg_read_helper_type & helper) { - using member_type = std::remove_cv; + using member_type = std::decay_t; static_assert( (std::is_array_v && is_valid_message_header_v>) || std::is_pod_v || std::is_integral_v || std::is_enum_v); @@ -152,11 +153,11 @@ namespace ipc { * It can be specialized for cases where the suffix must be first encoded into some temporary buffer, or for when * it can be blitted directly from memory. */ - template + template struct blob_codec_t; - template<> - struct blob_codec_t { + template + struct blob_codec_t { /** The blob type */ using value_type = void; @@ -210,20 +211,20 @@ namespace ipc { }; /** Specialization for vector of integrals */ - template - struct blob_codec_t, std::enable_if_t>> - : byte_span_blob_codec_t> { + template + struct blob_codec_t, U, std::enable_if_t>> + : byte_span_blob_codec_t, U> { }; /** Specialization for Span of integrals */ - template - struct blob_codec_t, std::enable_if_t>> - : byte_span_blob_codec_t> { + template + struct blob_codec_t, U, std::enable_if_t>> + : byte_span_blob_codec_t, U> { }; /** Specialization for protobuf messages */ - template - struct blob_codec_t>> { + template + struct blob_codec_t>> { using value_type = T; // Unlike byte_span_blob_codec_t, the protobuf classes cannot be their @@ -292,18 +293,32 @@ namespace ipc { return boost::asio::mutable_buffer {reinterpret_cast(helper.buffer.data()), helper.length}; } }; + template + struct key_codec_key_type_t; + // specialized for message_t + template + struct key_codec_key_type_t> { + using key_type = message_key_t; + }; + // specialized for response_t + template + struct key_codec_key_type_t> { + using key_type = response_type; + }; /** * Codec object for the 'key' value * * This class provides the means to encode and decode just the `[key]` part of a message. */ + template struct key_codec_t { - /** The message type */ + /** The message key type */ + using key_type = typename key_codec_key_type_t::key_type; /** The number of buffers required to perform a scatter gather based write of the key */ static constexpr std::size_t sg_writer_buffers_count = 1; /** The total size required to encode/decode the key + header */ - static constexpr std::size_t key_size = sizeof(message_key_t); + static constexpr std::size_t key_size = sizeof(key_type); /** Fill a scatter-gather buffer list for writing out the key */ static void fill_sg_buffer(lib::Span sg_list, message_key_t const & key) @@ -311,6 +326,11 @@ namespace ipc { sg_list[0] = {reinterpret_cast(&key), key_size}; } + /** Fill a scatter-gather buffer list for writing out the key */ + static void fill_sg_buffer(lib::Span sg_list, response_type const & key) + { + sg_list[0] = {reinterpret_cast(&key), key_size}; + } /** Make a mutable buffer out of the key (for reading into) */ static auto mutable_buffer(message_key_t & key) { @@ -338,6 +358,37 @@ namespace ipc { template struct header_codec_t; + /** Specialization for message types based on message_t where header_type is response_t which has no header */ + template + struct header_codec_t> { + /** The message type */ + using message_type = response_t; + + /** The number of buffers required to perform a scatter gather based write of the header (which in this case 0 as there is no header) */ + static constexpr std::size_t sg_writer_buffers_count = 0; + /** The total size required to encode/decode the key + header (which in this case is 0 as there is no header) */ + static constexpr std::size_t header_size = 0; + + //static_assert(Key != message_key_t::unknown); + //static_assert(std::is_void_v); + + /** Fill a scatter-gather buffer list for writing out the header */ + static constexpr void fill_sg_buffer(lib::Span /*sg_list*/, + message_type const & /*message*/) + { + } + + /** Make a mutable buffer out of the header (for reading into) */ + static auto mutable_buffer(message_type & /*message*/) { return boost::asio::mutable_buffer {}; } + + /** Read the header from some byte-span. */ + static constexpr lib::Span read_header(lib::Span const & bytes, + message_type & /*message*/) + { + return bytes; + } + }; + /** Specialization for message types based on message_t */ template struct header_codec_t> { @@ -417,6 +468,44 @@ namespace ipc { template struct suffix_codec_t; + /** Specialization for message types based on response_t */ + template + struct suffix_codec_t> { + /** The message type */ + using message_type = response_t; + /** The suffix type */ + using suffix_type = typename message_type::payload_type; + /** The encoder type */ + using encoder_type = blob_codec_t; + /** The scatter-gather helper object which stores the length and buffer so that the length field may be scatter-gathered */ + using sg_write_helper_type = typename encoder_type::sg_write_helper_type; + + /** The number of buffers required to perform a scatter gather based write of the length + suffix fields */ + static constexpr std::size_t sg_writer_buffers_count = encoder_type::sg_writer_buffers_count; + + /** The size of the length field */ + static constexpr std::size_t length_size = sizeof(std::int32_t); + + /** Fill the sg_write_helper_type value */ + static constexpr sg_write_helper_type fill_sg_write_helper_type(message_type const & message) + { + return encoder_type::fill_sg_write_helper_type(message.payload); + } + + /** The total size required to store the encoded suffix buffer + length field */ + static constexpr std::size_t suffix_write_size(sg_write_helper_type const & helper) + { + return encoder_type::suffix_write_size(helper); + } + + /** Fill a scatter-gather buffer list for writing out the header */ + static constexpr void fill_sg_buffer(lib::Span sg_list, + sg_write_helper_type const & helper) + { + return encoder_type::fill_sg_buffer(sg_list, helper); + } + }; + /** Specialization for message types based on message_t */ template struct suffix_codec_t> { @@ -425,7 +514,7 @@ namespace ipc { /** The suffix type */ using suffix_type = typename message_type::suffix_type; /** The encoder type */ - using encoder_type = blob_codec_t; + using encoder_type = blob_codec_t; /** The scatter-gather helper object which stores the length and buffer so that the length field may be scatter-gathered */ using sg_write_helper_type = typename encoder_type::sg_write_helper_type; /** The scatter-gather helper object used for reading the suffix */ @@ -498,7 +587,7 @@ namespace ipc { /** The suffix type */ using suffix_type = typename message_type::suffix_type; /** The encoder type */ - using encoder_type = blob_codec_t; + using encoder_type = blob_codec_t; /** The scatter-gather helper object which stores the length and buffer so that the length field may be scatter-gathered */ using sg_write_helper_type = typename encoder_type::sg_write_helper_type; /** The scatter-gather helper object used for reading the suffix */ diff --git a/daemon/ipc/message_key.h b/daemon/ipc/message_key.h index 8849980e..1d464dca 100644 --- a/daemon/ipc/message_key.h +++ b/daemon/ipc/message_key.h @@ -31,6 +31,8 @@ namespace ipc { apc_frame_data, exec_target_app, cpu_state_change, + capture_failed, + capture_started, }; /** The wire-size of the message key */ diff --git a/daemon/ipc/messages.h b/daemon/ipc/messages.h index 8e71e946..42c0a3dd 100644 --- a/daemon/ipc/messages.h +++ b/daemon/ipc/messages.h @@ -6,6 +6,7 @@ #include "ipc/message_key.h" #include "ipc/message_traits.h" #include "ipc/proto/generated/capture_configuration.pb.h" +#include "message_key.h" #include #include @@ -17,6 +18,21 @@ namespace ipc { monotonic_delta_t monotonic_delta; int core_no; bool online; + + friend constexpr bool operator==(cpu_state_change_t const & a, cpu_state_change_t const & b) + { + return (a.monotonic_delta == b.monotonic_delta) && (a.core_no == b.core_no) && (a.online == b.online); + } + + friend constexpr bool operator!=(cpu_state_change_t const & a, cpu_state_change_t const & b) + { + return !(a == b); + } + }; + + enum class capture_failed_reason_t : std::uint8_t { + /** Capture failed due to command exec failure */ + command_exec_failed, }; /** @@ -102,6 +118,14 @@ namespace ipc { using msg_cpu_state_change_t = message_t; DEFINE_NAMED_MESSAGE(msg_cpu_state_change_t); + /** Sent from perf agent to shell if capture fails for some reason */ + using msg_capture_failed_t = message_t; + DEFINE_NAMED_MESSAGE(msg_capture_failed_t); + + /** Sent from perf agent to shell starts capturing data */ + using msg_capture_started_t = message_t; + DEFINE_NAMED_MESSAGE(msg_capture_started_t); + /** All supported message types */ using all_message_types_variant_t = std::variant; } diff --git a/daemon/ipc/proto/shell/perf/capture_configuration.proto b/daemon/ipc/proto/shell/perf/capture_configuration.proto index c2142e30..8f3d733b 100644 --- a/daemon/ipc/proto/shell/perf/capture_configuration.proto +++ b/daemon/ipc/proto/shell/perf/capture_configuration.proto @@ -15,6 +15,7 @@ message capture_configuration_t { int32 sample_rate = 3; // Equivalent to SessionData::mSampleRate bool one_shot = 4; // Equivalent to SessionData::mOneShot bool exclude_kernel_events = 5; // Equivalent to SessionData::mExcludeKernelEvents + bool stop_on_exit = 6; // Equivalent to SessionData::mStopOnExit } /** Equivalent to PerfConfig */ @@ -28,14 +29,15 @@ message capture_configuration_t { bool has_attr_context_switch = 7; bool has_ioctl_read_id = 8; bool has_aux_support = 9; + bool has_exclude_callchain_kernel = 10; - bool is_system_wide = 10; - bool exclude_kernel = 11; - bool can_access_tracepoints = 12; + bool is_system_wide = 11; + bool exclude_kernel = 12; + bool can_access_tracepoints = 13; - bool has_armv7_pmu_driver = 13; - bool has_64bit_uname = 14; - bool use_64bit_register_set = 15; + bool has_armv7_pmu_driver = 14; + bool has_64bit_uname = 15; + bool use_64bit_register_set = 16; } /** Equivalent to perf_ringbuffer_config_t */ @@ -82,7 +84,7 @@ message capture_configuration_t { message cpu_properties_t { int32 cluster_index = 1; int32 cpu_id = 2; - int32 spe_type = 3; + uint32 spe_type = 3; } /** Equivalent to UncorePmu */ @@ -95,25 +97,6 @@ message capture_configuration_t { bool has_cycles_counter = 6; } - /** Equivalent to PerfEventGroupIdentifier. - * - * Because pointers do not work between forks, the types (and therefore meaning) - * have changed: - * - cluster capture_configuration_t.clusters index - * - pmu capture_configuration_t.uncore_pmus index - * - specific_cpu the specific cpu_number - * - spe if true then type == SPE, when false type == GLOBAL - * - type == GLOBAL - */ - message perf_event_group_identifier_t { - oneof type { - uint64 per_cluster_cpu = 1; // Type::PER_CLUSTER_CPU - uint64 uncore_pmu = 2; // Type::UNCORE_PMU - int32 specific_cpu = 3; // Type::SPECIFIC_CPU - bool spe = 4; // Type::SPE (when true), Type::GLOBAL (when false, or no other type field set) - } - } - /** Equivalent to perf_event_attr */ message perf_event_attribute_t { enum precise_ip_t { @@ -162,22 +145,26 @@ message capture_configuration_t { uint32 aux_watermark = 38; } - /** Equivalent to perf_event_t */ - message perf_event_t { + /** Equivalent to event_definition_t */ + message perf_event_definition_t { perf_event_attribute_t attr = 1; int32 key = 2; } - /** A tuple of identifier + list of events representing one event group */ - message perf_event_group_events_t { - perf_event_group_identifier_t id = 1; - repeated perf_event_t events = 2; + /** List of perf_event_definition_t (for map entries) */ + message perf_event_definition_list_t { + repeated perf_event_definition_t events = 1; } - /** A subset of perf_groups_state_t required for the activator */ - message perf_groups_t { - repeated perf_event_group_events_t groups = 1; - uint64 number_of_events_added = 2; + + /** Equivalent to event_configuration_t */ + message perf_event_configuration_t { + perf_event_definition_t header_event = 1; + perf_event_definition_list_t global_events = 2; + perf_event_definition_list_t spe_events = 3; + map cluster_specific_events = 4; + map cpu_specific_events = 5; + map uncore_specific_events = 6; } // ------------------------------------------- @@ -187,7 +174,7 @@ message capture_configuration_t { repeated cpu_cluster_t clusters = 3; repeated uncore_pmu_t uncore_pmus = 4; repeated cpu_properties_t cpus = 5; - perf_groups_t perf_groups = 6; + perf_event_configuration_t event_configuration = 6; perf_ringbuffer_config_t ringbuffer_config = 7; command_t command = 8; string wait_process = 9; @@ -195,4 +182,6 @@ message capture_configuration_t { uint32 num_cpu_cores = 11; bool enable_on_exec = 12; map cpuid_to_core_name = 13; + map perf_pmu_type_to_name = 14; + bool stop_pids = 15; } diff --git a/daemon/ipc/raw_ipc_channel_sink.h b/daemon/ipc/raw_ipc_channel_sink.h index 35b6f14c..7a31a3d2 100644 --- a/daemon/ipc/raw_ipc_channel_sink.h +++ b/daemon/ipc/raw_ipc_channel_sink.h @@ -3,13 +3,17 @@ #pragma once #include "Logging.h" +#include "async/continuations/async_initiate.h" +#include "async/continuations/stored_continuation.h" #include "ipc/codec.h" #include "ipc/message_key.h" #include "ipc/message_traits.h" +#include "ipc/responses.h" #include "lib/Assert.h" #include "lib/AutoClosingFd.h" #include +#include #include #include @@ -27,6 +31,10 @@ namespace ipc { */ class raw_ipc_channel_sink_t : public std::enable_shared_from_this { public: + template + using stored_message_continuation_t = + async::continuations::raw_stored_continuation_t; + /** Factory method */ static std::shared_ptr create(boost::asio::io_context & io_context, lib::AutoClosingFd && out) @@ -40,15 +48,34 @@ namespace ipc { template auto async_send_message(MessageType message, CompletionToken && token) { + using namespace async::continuations; + using message_type = std::decay_t; + static_assert(is_ipc_message_type_v); - return boost::asio::async_initiate( - [st = shared_from_this(), message = std::forward(message)](auto && handler) mutable { - using Handler = decltype(handler); - st->do_async_send_message(std::forward(message), std::forward(handler)); + return async_initiate_explicit( + [st = shared_from_this(), message = std::forward(message)](auto && sc) mutable { + st->do_async_send_message(std::forward(message), std::forward(sc)); }, - token); + std::forward(token)); + } + + /** + * Write some fixed-size message into the send buffer. + */ + template + auto async_send_response(MessageType message, CompletionToken && token) + { + using namespace async::continuations; + using message_type = std::decay_t; + static_assert(is_response_message_type_v); + + return async_initiate_explicit( + [st = shared_from_this(), message = std::forward(message)](auto && sc) mutable { + st->do_async_send_message(std::forward(message), std::forward(sc)); + }, + std::forward(token)); } private: @@ -59,29 +86,30 @@ namespace ipc { [[nodiscard]] virtual std::size_t expected_size() const = 0; virtual void do_send(raw_ipc_channel_sink_t & parent, std::shared_ptr shared_this) = 0; - virtual void call_handler(boost::system::error_code const & ec) = 0; + virtual void call_handler(boost::asio::io_context & context, boost::system::error_code const & ec) = 0; }; /** Default message queue item type, copies the message into a buffer object held in the queue item */ - template + template class message_queue_item_t : public message_queue_item_base_t { public: using message_type = std::decay_t; - using handler_type = std::decay_t; + using key_codec_type = key_codec_t; using header_codec_type = header_codec_t; using suffix_codec_type = suffix_codec_t; using sg_write_helper_type = typename suffix_codec_type::sg_write_helper_type; + using stored_continuation_t = stored_message_continuation_t; - constexpr message_queue_item_t(message_type && message, handler_type && handler) - : message(std::forward(message)), + constexpr message_queue_item_t(message_type && message, stored_continuation_t && sc) + : message(std::move(message)), sg_helper(suffix_codec_type::fill_sg_write_helper_type(this->message)), - handler(std::forward(handler)) + sc(std::move(sc)) { } [[nodiscard]] std::size_t expected_size() const override { - return key_codec_t::key_size + header_codec_type::header_size + return key_codec_type::key_size + header_codec_type::header_size + suffix_codec_type::suffix_write_size(sg_helper); } @@ -90,19 +118,19 @@ namespace ipc { { using sg_buffer_type = std::array; // fill the scatter gather buffer list sg_buffer_type buffers {}; lib::Span buffers_span {buffers}; - key_codec_t::fill_sg_buffer(buffers_span.subspan(0, key_codec_t::sg_writer_buffers_count), - message_type::key); - header_codec_type::fill_sg_buffer(buffers_span.subspan(key_codec_t::sg_writer_buffers_count, + key_codec_type::fill_sg_buffer(buffers_span.subspan(0, key_codec_type::sg_writer_buffers_count), + message_type::key); + header_codec_type::fill_sg_buffer(buffers_span.subspan(key_codec_type::sg_writer_buffers_count, header_codec_type::sg_writer_buffers_count), message); - suffix_codec_type::fill_sg_buffer(buffers_span.subspan(key_codec_t::sg_writer_buffers_count + suffix_codec_type::fill_sg_buffer(buffers_span.subspan(key_codec_type::sg_writer_buffers_count + header_codec_type::sg_writer_buffers_count), sg_helper); @@ -110,16 +138,19 @@ namespace ipc { parent.do_send_item(std::move(shared_this), std::move(buffers)); } - void call_handler(boost::system::error_code const & ec) override { handler(ec, std::move(message)); } + void call_handler(boost::asio::io_context & context, boost::system::error_code const & ec) override + { + resume_continuation(context, std::move(sc), ec, std::move(message)); + } private: message_type message; sg_write_helper_type sg_helper; - handler_type handler; + stored_continuation_t sc; }; // so it can call do_send_item - template + template friend class message_queue_item_t; boost::asio::io_context::strand strand; @@ -134,31 +165,27 @@ namespace ipc { } /** Insert the message and handler into the send queue */ - template - void do_async_send_message(MessageType && message, Handler && handler) + template + void do_async_send_message(MessageType && message, + stored_message_continuation_t> && sc) { using message_type = std::decay_t; - using handler_type = std::decay_t; - - static_assert(std::is_same_v); - static_assert(std::is_same_v); - - using queue_item_t = message_queue_item_t; + using queue_item_t = message_queue_item_t; LOG_TRACE("(%p) New send request received with key %zu", this, std::size_t(message_type::key)); // run on the strand to serialize access to the queue - boost::asio::post( - strand, - [st = shared_from_this(), - queue_item = std::make_shared(std::forward(message), - std::forward(handler))]() mutable { - st->strand_do_async_send_message(message_type::key, std::move(queue_item)); - }); + boost::asio::post(strand, + [st = shared_from_this(), + queue_item = std::make_shared(std::forward(message), + std::move(sc))]() mutable { + st->strand_do_async_send_message(message_type::key, std::move(queue_item)); + }); } /** Insert the message and handler into the send queue */ - void strand_do_async_send_message(message_key_t key, std::shared_ptr queue_item) + template + void strand_do_async_send_message(MessageType key, std::shared_ptr queue_item) { // fast path for case that queue is already empty and consumer is waiting const auto cip = is_consume_in_progress(); @@ -211,11 +238,11 @@ namespace ipc { buffers, [st = shared_from_this(), queue_item = std::move(queue_item)]( boost::system::error_code const & ec, - std::size_t n) mutable { st->on_sent_result(std::move(queue_item), ec, n); }); + std::size_t n) mutable { st->on_sent_result(queue_item, ec, n); }); } /** Handle the send result */ - void on_sent_result(std::shared_ptr queue_item, + void on_sent_result(std::shared_ptr const & queue_item, boost::system::error_code const & ec, std::size_t n) { @@ -225,23 +252,21 @@ namespace ipc { this, queue_item.get(), ec.message().c_str()); - // notify the handler (but post so it happens asynchronously) - return boost::asio::post(strand.context(), - [ec, queue_item = std::move(queue_item)]() { queue_item->call_handler(ec); }); + // notify the handler (which happens asynchronously) + return queue_item->call_handler(strand.context(), ec); } // short write error if (n != queue_item->expected_size()) { LOG_DEBUG("(%p) Sending queue item %p failed with short write %zu", this, queue_item.get(), n); - // notify the handler (but post so it happens asynchronously) - return boost::asio::post(strand.context(), [queue_item = std::move(queue_item)]() { - queue_item->call_handler(boost::asio::error::make_error_code(boost::asio::error::misc_errors::eof)); - }); + // notify the handler (which happens asynchronously) + return queue_item->call_handler( + strand.context(), + boost::asio::error::make_error_code(boost::asio::error::misc_errors::eof)); } - // notify the handler - boost::asio::post(strand.context(), - [queue_item = std::move(queue_item)]() { queue_item->call_handler({}); }); + // notify the handler (which happens asynchronously) + queue_item->call_handler(strand.context(), {}); // consume the next item (but from the stand as it will modify state) return boost::asio::post(strand, [st = shared_from_this()]() { st->strand_do_consume_next(); }); diff --git a/daemon/ipc/raw_ipc_channel_source.h b/daemon/ipc/raw_ipc_channel_source.h index e010a5c0..a1e1c054 100644 --- a/daemon/ipc/raw_ipc_channel_source.h +++ b/daemon/ipc/raw_ipc_channel_source.h @@ -3,7 +3,9 @@ #pragma once #include "Logging.h" -#include "async/completion_handler.h" +#include "async/continuations/async_initiate.h" +#include "async/continuations/stored_continuation.h" +#include "async/continuations/use_continuation.h" #include "ipc/codec.h" #include "ipc/message_key.h" #include "ipc/message_traits.h" @@ -13,6 +15,7 @@ #include #include +#include #include #include #include @@ -40,29 +43,27 @@ namespace ipc { }; /** Helper wrapper to hold the handler, and enforce release of 'busy' state */ - template - class handler_wrapper_t { + template + class sc_wrapper_t { public: - using handler_type = std::decay_t; + using stored_continuation_type = async::continuations:: + raw_stored_continuation_t; - static_assert(std::is_same_v); + explicit constexpr sc_wrapper_t(stored_continuation_type && sc) : sc(std::move(sc)) {} - explicit constexpr handler_wrapper_t(handler_type && handler) : handler(std::forward(handler)) - { - } - - void operator()(bool & recv_in_progress, + void operator()(boost::asio::io_context & context, + bool & recv_in_progress, boost::system::error_code const & ec, all_message_types_variant_t && message) { // mark receive complete so another may be queued recv_in_progress = false; // invoke the handler - handler(ec, std::move(message)); + resume_continuation(context, std::move(sc), ec, std::move(message)); }; private: - handler_type handler; + stored_continuation_type sc; }; /** Helper to find the traits type for some key from the list of supported types */ @@ -72,10 +73,10 @@ namespace ipc { // the terminator must be std::monostate template<> struct message_types_trait_finder_t { - template - static constexpr void visit(message_key_t key, T & host, handler_wrapper_t && handler) + template + static constexpr void visit(message_key_t key, T & host, sc_wrapper_t && scw) { - host.do_recv_unknown(key, std::move(handler)); + host.do_recv_unknown(key, std::move(scw)); } }; @@ -86,14 +87,14 @@ namespace ipc { static_assert(traits_type::key != message_key_t::unknown); - template - static constexpr void visit(message_key_t key, T & host, handler_wrapper_t && handler) + template + static constexpr void visit(message_key_t key, T & host, sc_wrapper_t && scw) { if (key == traits_type::key) { - host.template do_recv_known(std::move(handler)); + host.template do_recv_known(std::move(scw)); } else { - next_message_traits_type::visit(key, host, std::move(handler)); + next_message_traits_type::visit(key, host, std::move(scw)); } } }; @@ -126,13 +127,13 @@ namespace ipc { template auto async_recv_message(CompletionToken && token) { - return boost::asio::async_initiate( - [st = shared_from_this()](auto && handler) mutable { - using Handler = decltype(handler); - st->do_async_recv_message(std::forward(handler)); + using namespace async::continuations; + + return async_initiate_explicit( + [st = shared_from_this()](auto && sc) mutable { + st->do_async_recv_message(std::forward(sc)); }, - token); + std::forward(token)); } private: @@ -144,8 +145,8 @@ namespace ipc { using message_wrapper_t = detail::message_wrapper_t; /** Helper wrapper to hold the handler, and enforce release of 'busy' state */ - template - using handler_wrapper_t = detail::handler_wrapper_t; + template + using sc_wrapper_t = detail::sc_wrapper_t; template friend struct detail::message_types_trait_finder_t; @@ -162,32 +163,37 @@ namespace ipc { } /** Perform the receive action */ - template - void do_async_recv_message(Handler && handler) + template + void do_async_recv_message( + async::continuations:: + raw_stored_continuation_t && sc) { - using handler_type = std::decay_t; - LOG_TRACE("(%p) New receive request received", this); // run on strand to serialize access - boost::asio::post(strand, - [st = shared_from_this(), handler = std::forward(handler)]() mutable { - st->strand_do_async_recv_message(std::forward(handler)); - }); + boost::asio::post(strand, [st = shared_from_this(), sc = std::move(sc)]() mutable { + st->strand_do_async_recv_message(std::move(sc)); + }); } /** Perform the receive action from the strand */ - template - void strand_do_async_recv_message(Handler && handler) + template + void strand_do_async_recv_message( + async::continuations:: + raw_stored_continuation_t && sc) { - using handler_type = std::decay_t; - using handler_wrapper_type = handler_wrapper_t; + using unknown_message = message_t; + using key_codec_type = key_codec_t; + using sc_wrapper_type = sc_wrapper_t; // should not already be pending... if (std::exchange(recv_in_progress, true)) { LOG_TRACE("(%p) Request aborted due to concurrent operation in progress", this); using namespace boost::system; - return handler(errc::make_error_code(errc::operation_in_progress), {}); + return resume_continuation(strand.context(), + std::move(sc), + errc::make_error_code(errc::operation_in_progress), + {}); } LOG_TRACE("(%p) Reading next key from stream", this); @@ -195,20 +201,19 @@ namespace ipc { // read the key boost::asio::async_read( in, - key_codec_t::mutable_buffer(message_key_buffer), - [st = shared_from_this(), - handler = handler_wrapper_type(std::forward(handler))](auto ec, auto n) mutable { + key_codec_type::mutable_buffer(message_key_buffer), + [st = shared_from_this(), scw = sc_wrapper_type(std::move(sc))](auto ec, auto n) mutable { // validate error if (ec) { LOG_TRACE("(%p) Reading next key failed with error=%s", st.get(), ec.message().c_str()); - return st->invoke_handler(std::move(handler), ec, {}); + return st->invoke_handler(std::move(scw), ec, {}); } // validate size - if (n != key_codec_t::key_size) { + if (n != key_codec_type::key_size) { LOG_TRACE("(%p) Reading next key failed with due to short read (n=%zu)", st.get(), n); return st->invoke_handler( - std::move(handler), + std::move(scw), boost::asio::error::make_error_code(boost::asio::error::misc_errors::eof), {}); } @@ -219,33 +224,30 @@ namespace ipc { LOG_TRACE("(%p) Reading next key succeeded with new key %zu", st.get(), std::size_t(key)); // find the matching traits type - return message_types_trait_finder_type::visit(key, *st, std::move(handler)); + return message_types_trait_finder_type::visit(key, *st, std::move(scw)); }); } /** Received an unexpected key */ - template - void do_recv_unknown(message_key_t key, handler_wrapper_t && handler) + template + void do_recv_unknown(message_key_t key, sc_wrapper_t && scw) { LOG_TRACE("(%p) Read aborted due to unrecognized message key %zu", this, std::size_t(key)); using namespace boost::system; - return invoke_handler(std::move(handler), errc::make_error_code(errc::operation_not_supported), {}); + return invoke_handler(std::move(scw), errc::make_error_code(errc::operation_not_supported), {}); } /** Received a known key and have the type traits for it */ - template - void do_recv_known(handler_wrapper_t && handler) + template + void do_recv_known(sc_wrapper_t && scw) { - using handler_type = std::decay_t; using traits_type = TraitsType; using message_type = typename traits_type::message_type; using header_codec_type = header_codec_t; using suffix_codec_type = suffix_codec_t; using wrapper_type = message_wrapper_t; - static_assert(std::is_same_v); - // allocate the message wrapper auto message_wrapper = std::make_shared(); @@ -254,7 +256,7 @@ namespace ipc { LOG_TRACE("(%p) Skipping header read for key %zu due to zero length header", this, std::size_t(traits_type::key)); - return do_recv_suffix_length(message_wrapper, std::move(handler)); + return do_recv_suffix_length(message_wrapper, std::move(scw)); } LOG_TRACE("(%p) Reading header for key %zu of length %zu", @@ -266,7 +268,7 @@ namespace ipc { return boost::asio::async_read( in, header_codec_type::mutable_buffer(message_wrapper->message), - [st = shared_from_this(), message_wrapper, handler = std::move(handler)](auto ec, auto n) mutable { + [st = shared_from_this(), message_wrapper, scw = std::move(scw)](auto ec, auto n) mutable { // validate error if (ec) { LOG_TRACE("(%p) Reading header for key=%zu failed with error=%s", @@ -274,7 +276,7 @@ namespace ipc { std::size_t(traits_type::key), ec.message().c_str()); - return st->invoke_handler(std::move(handler), ec, {}); + return st->invoke_handler(std::move(scw), ec, {}); } // validate size @@ -285,19 +287,19 @@ namespace ipc { n); return st->invoke_handler( - std::move(handler), + std::move(scw), boost::asio::error::make_error_code(boost::asio::error::misc_errors::eof), {}); } // now read the suffix - return st->do_recv_suffix_length(message_wrapper, std::move(handler)); + return st->do_recv_suffix_length(message_wrapper, std::move(scw)); }); } /** Received the key and header, now read the length */ - template - void do_recv_suffix_length(std::shared_ptr message_wrapper, handler_wrapper_t && handler) + template + void do_recv_suffix_length(std::shared_ptr message_wrapper, sc_wrapper_t && scw) { using traits_type = TraitsType; using message_type = typename traits_type::message_type; @@ -311,7 +313,7 @@ namespace ipc { LOG_TRACE("(%p) Skipping suffix length read for key %zu due to zero length", this, std::size_t(traits_type::key)); - return do_recv_suffix(message_wrapper, std::move(handler)); + return do_recv_suffix(message_wrapper, std::move(scw)); } LOG_TRACE("(%p) Reading suffix length for key %zu of length %zu", @@ -323,7 +325,7 @@ namespace ipc { return boost::asio::async_read( in, suffix_codec_type::mutable_length_buffer(message_wrapper->buffer), - [st = shared_from_this(), message_wrapper, handler = std::move(handler)](auto ec, auto n) mutable { + [st = shared_from_this(), message_wrapper, scw = std::move(scw)](auto ec, auto n) mutable { // validate error if (ec) { LOG_TRACE("(%p) Reading suffix length for key=%zu failed with error=%s", @@ -331,7 +333,7 @@ namespace ipc { std::size_t(traits_type::key), ec.message().c_str()); - return st->invoke_handler(std::move(handler), ec, {}); + return st->invoke_handler(std::move(scw), ec, {}); } // validate size @@ -342,19 +344,19 @@ namespace ipc { n); return st->invoke_handler( - std::move(handler), + std::move(scw), boost::asio::error::make_error_code(boost::asio::error::misc_errors::eof), {}); } // now read the suffix - return st->do_recv_suffix(message_wrapper, std::move(handler)); + return st->do_recv_suffix(message_wrapper, std::move(scw)); }); } /** Received the key, header and length, now read the suffix */ - template - void do_recv_suffix(std::shared_ptr message_wrapper, handler_wrapper_t && handler) + template + void do_recv_suffix(std::shared_ptr message_wrapper, sc_wrapper_t && scw) { using traits_type = TraitsType; using message_type = typename traits_type::message_type; @@ -371,7 +373,7 @@ namespace ipc { this, std::size_t(traits_type::key)); - return do_recv_complete(message_wrapper, std::move(handler)); + return do_recv_complete(message_wrapper, std::move(scw)); } LOG_TRACE("(%p) Reading suffix for key %zu of length %zu", @@ -383,10 +385,9 @@ namespace ipc { return boost::asio::async_read( in, buffer, - [st = shared_from_this(), - message_wrapper, - handler = std::move(handler), - expected_length = buffer.size()](auto ec, auto n) mutable { + [st = shared_from_this(), message_wrapper, scw = std::move(scw), expected_length = buffer.size()]( + auto ec, + auto n) mutable { // validate error if (ec) { LOG_TRACE("(%p) Reading suffix for key=%zu failed with error=%s", @@ -394,7 +395,7 @@ namespace ipc { std::size_t(traits_type::key), ec.message().c_str()); - return st->invoke_handler(std::move(handler), ec, {}); + return st->invoke_handler(std::move(scw), ec, {}); } // validate size @@ -405,19 +406,19 @@ namespace ipc { n); return st->invoke_handler( - std::move(handler), + std::move(scw), boost::asio::error::make_error_code(boost::asio::error::misc_errors::eof), {}); } // now read the suffix - return st->do_recv_complete(message_wrapper, std::move(handler)); + return st->do_recv_complete(message_wrapper, std::move(scw)); }); } /** Read complete */ - template - void do_recv_complete(std::shared_ptr message_wrapper, handler_wrapper_t && handler) + template + void do_recv_complete(std::shared_ptr message_wrapper, sc_wrapper_t && scw) { using traits_type = TraitsType; using message_type = typename traits_type::message_type; @@ -438,68 +439,47 @@ namespace ipc { std::size_t(traits_type::key), ec.message().c_str()); - return invoke_handler(std::move(handler), ec, {}); + return invoke_handler(std::move(scw), ec, {}); } // notify handler - return invoke_handler(std::move(handler), ec, std::move(message_wrapper->message)); + return invoke_handler(std::move(scw), ec, std::move(message_wrapper->message)); } /** Invoke the handler */ - template - void invoke_handler(handler_wrapper_t && handler, + template + void invoke_handler(sc_wrapper_t && scw, boost::system::error_code const & ec, all_message_types_variant_t && message) { - handler(recv_in_progress, ec, std::move(message)); + scw(strand.context(), recv_in_progress, ec, std::move(message)); } }; - /** - * An async operation that receives from the source channel until one of the requested - * message types arrives. Any unrequested message types are logged and discarded. - */ - template - class receive_one_of_op { - using type_tuple = std::tuple; - - public: - explicit receive_one_of_op(std::shared_ptr channel, CompletionHandler && handler) - : handler {std::forward(handler)}, channel(std::move(channel)) - { - } - - void operator()(const boost::system::error_code & ec, all_message_types_variant_t && msg_variant) - { - if (ec) { - handler(ec, std::variant {}); - return; - } + namespace detail { - std::visit([this](auto && msg) { this->try_message_filter(std::forward(msg)); }, - msg_variant); - } + template + struct try_message_filter_t { + using type_tuple = std::tuple...>; + using variant_type = std::variant...>; + using pair_type = std::pair; - private: - using handler_type = - async::completion_handler_ref_t>; - handler_type handler; - std::shared_ptr channel; + template + static constexpr std::optional filter(ReceivedType && value) + { + using received_type = std::decay_t; - template - void try_message_filter(MessageType && msg) - { - using T = std::decay_t; - if constexpr (boost::mp11::mp_contains::value) { - handler({}, std::variant(std::forward(msg))); + if constexpr (boost::mp11::mp_contains::value) { + return std::pair {boost::system::error_code {}, variant_type {std::forward(value)}}; + } + else { + LOG_DEBUG("Unexpected message [%s]", ipc::named_message_t::name.data()); + return {}; + } } - else { - LOG_DEBUG("Unexpected message [%s]", ipc::named_message_t::name.data()); + }; - channel->async_recv_message(std::move(*this)); - } - } - }; + } /** * Receive one of a subset of message types from a raw_ipc_channel_source_t. Will continuously @@ -513,16 +493,37 @@ namespace ipc { template auto async_receive_one_of(std::shared_ptr source, CompletionToken && token) { - return boost::asio::async_initiate)>( - [src = std::move(source)](auto && handler) { - using HandlerType = decltype(handler); - auto op = receive_one_of_op { - src, - std::forward(handler)}; - src->async_recv_message(std::move(op)); + using namespace async::continuations; + + using filter_type = detail::try_message_filter_t; + using optional_pair_type = std::optional; + using variant_type = typename filter_type::variant_type; + + return async_initiate_explicit)>( + [src = std::move(source)](auto && sc) { + submit( + start_with(optional_pair_type {}) // + | loop( + [](optional_pair_type && pair) { return start_with(!pair.has_value(), std::move(pair)); }, + [src](optional_pair_type const &) { + return src->async_recv_message(use_continuation) // + | then([](boost::system::error_code const & ec, + all_message_types_variant_t && msg_variant) -> optional_pair_type { + if (ec) { + return std::pair {ec, variant_type {}}; + } + + return std::visit( + [](auto && msg) { + return filter_type::filter(std::forward(msg)); + }, + std::move(msg_variant)); + }); + }) + | then([](optional_pair_type && value) { return *value; }) // + | unpack_tuple(), + std::forward(sc)); }, - token); + std::forward(token)); } } diff --git a/daemon/ipc/responses.h b/daemon/ipc/responses.h new file mode 100644 index 00000000..82bec672 --- /dev/null +++ b/daemon/ipc/responses.h @@ -0,0 +1,56 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ +#pragma once + +#include "ISender.h" +#include "lib/Span.h" + +#include + +namespace ipc { + + enum class response_type : char { + unknown = 0, + // Actual values understood by streamline + xml = 1, + apc_data = 3, + ack = 4, + nak = 5, + current_config = 6, + error = '\xFF' + }; + + template + struct response_t { + static constexpr response_type key = Key; + using payload_type = Payload; + + payload_type payload; + }; + + using response_apc_data_t = response_t>; + using response_xml_t = response_t>; + using response_current_config_t = response_t>; + using response_error_t = response_t>; + using response_ack_t = response_t>; + using response_nak_t = response_t>; + + /** Traits object for response types */ + template + struct repsonse_traits_t { + static constexpr RepsonseType key = RepsonseType::RAW; + }; + + /** Traits object for response types derived from response_t */ + template + struct repsonse_traits_t> { + using message_type = response_t; + using payload_type = typename message_type::payload_type; + + static constexpr response_type key = message_type::key; + }; + + /** Helper trait to validate some response type */ + template + static constexpr bool is_response_message_type_v = (repsonse_traits_t::key + != response_type::unknown); +} diff --git a/daemon/lib/Assert.cpp b/daemon/lib/Assert.cpp index eedf0461..3987a442 100644 --- a/daemon/lib/Assert.cpp +++ b/daemon/lib/Assert.cpp @@ -1,21 +1,26 @@ -/* Copyright (C) 2016-2020 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2016-2022 by Arm Limited. All rights reserved. */ #include "lib/Assert.h" -#if !defined(NDEBUG) +#if CONFIG_ASSERTIONS #include #include +#include -namespace lib { namespace _assert_internal { +namespace lib::_assert_internal { /** * Assertion helper; outputs error message and terminates */ void runtime_assert_terminate(const char * file, unsigned line, const char * func, const std::string & msg) { fprintf(stderr, "Assertion failed failure in '%s' @ [%s:%u]: %s\n", func, file, line, msg.c_str()); +#if defined(GATOR_UNIT_TESTS) && GATOR_UNIT_TESTS + throw std::runtime_error("failed in runtime_assert"); +#else abort(); +#endif } -}} +} -#endif /* !defined(NDEBUG) */ +#endif /* CONFIG_ASSERTIONS */ diff --git a/daemon/lib/Assert.h b/daemon/lib/Assert.h index 7399f85d..e87d6bc1 100644 --- a/daemon/lib/Assert.h +++ b/daemon/lib/Assert.h @@ -1,9 +1,11 @@ -/* Copyright (C) 2016-2020 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2016-2022 by Arm Limited. All rights reserved. */ #ifndef INCLUDE_LIB_ASSERT_H #define INCLUDE_LIB_ASSERT_H -#if !defined(NDEBUG) +#include "Config.h" + +#if CONFIG_ASSERTIONS #include #include @@ -37,7 +39,7 @@ namespace lib { } while (0) } -#else /* !defined(NDEBUG) */ +#else /* CONFIG_ASSERTIONS */ /** assertion macro */ #define runtime_assert(test, msg) \ @@ -46,6 +48,6 @@ namespace lib { } \ } while (0) -#endif /* !defined(NDEBUG) */ +#endif /* CONFIG_ASSERTIONS */ #endif /* INCLUDE_LIB_ASSERT_H */ diff --git a/daemon/lib/Popen.cpp b/daemon/lib/Popen.cpp index 966a846f..9e26f563 100644 --- a/daemon/lib/Popen.cpp +++ b/daemon/lib/Popen.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2018-2022 by Arm Limited. All rights reserved. */ #include "lib/Popen.h" @@ -55,7 +55,7 @@ namespace lib { ::setvbuf(stderr, nullptr, _IONBF, 0); // get sighup if parent exits - ::prctl(PR_SET_PDEATHSIG, SIGHUP); + ::prctl(PR_SET_PDEATHSIG, SIGKILL); execvp(command_and_args[0], const_cast(command_and_args.data())); const int error = errno; diff --git a/daemon/lib/Span.h b/daemon/lib/Span.h index bb673aeb..1d1e439c 100644 --- a/daemon/lib/Span.h +++ b/daemon/lib/Span.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2018-2022 by Arm Limited. All rights reserved. */ #ifndef INCLUDE_LIB_SPAN_H #define INCLUDE_LIB_SPAN_H @@ -159,6 +159,11 @@ namespace lib { { return Span {array, Size}; } + + template + Span(C const &) -> Span; } #endif // INCLUDE_LIB_SPAN_H diff --git a/daemon/lib/Syscall.cpp b/daemon/lib/Syscall.cpp index 4134170b..18fd09ac 100644 --- a/daemon/lib/Syscall.cpp +++ b/daemon/lib/Syscall.cpp @@ -1,9 +1,11 @@ -/* Copyright (C) 2018-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2018-2022 by Arm Limited. All rights reserved. */ #include "Syscall.h" +#include +#include + #include -#include #include #include #include @@ -14,6 +16,7 @@ namespace lib { int close(int fd) { return ::close(fd); } int open(const char * path, int flag) { return ::open(path, flag); } + int open(const char * path, int flag, mode_t mode) { return ::open(path, flag, mode); }; int fcntl(int fd, int cmd, unsigned long arg) { return ::fcntl(fd, cmd, arg); } int ioctl(int fd, unsigned long int request, unsigned long arg) { return ::ioctl(fd, request, arg); } @@ -31,11 +34,13 @@ namespace lib { const int group_fd, const unsigned long flags) { + // NOLINTNEXTLINE(bugprone-narrowing-conversions) return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } int accept4(int sockfd, struct sockaddr * addr, socklen_t * addrlen, int flags) { + // NOLINTNEXTLINE(bugprone-narrowing-conversions) return syscall(__NR_accept4, sockfd, addr, addrlen, flags); } @@ -54,5 +59,15 @@ namespace lib { int access(const char * filename, int how) { return ::access(filename, how); } - void exit(int status) { ::exit(status); } + void exit(int status) + { + // NOLINTNEXTLINE(concurrency-mt-unsafe) + ::exit(status); + } + + int kill(pid_t pid, int signal) { return ::kill(pid, signal); } + + pid_t getppid() { return ::getppid(); } + pid_t getpid() { return ::getpid(); } + pid_t gettid() { return pid_t(syscall(__NR_gettid)); } } diff --git a/daemon/lib/Syscall.h b/daemon/lib/Syscall.h index 4882da63..607f6d5b 100644 --- a/daemon/lib/Syscall.h +++ b/daemon/lib/Syscall.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2018-2022 by Arm Limited. All rights reserved. */ #ifndef INCLUDE_LIB_SYSCALL_H #define INCLUDE_LIB_SYSCALL_H @@ -20,6 +20,8 @@ namespace lib { int open(const char * path, int flag); + int open(const char * path, int flag, mode_t mode); + int fcntl(int fd, int cmd, unsigned long arg = 0); int ioctl(int fd, unsigned long request, unsigned long arg); @@ -45,6 +47,12 @@ namespace lib { int poll(struct pollfd * __fds, nfds_t __nfds, int __timeout); int access(const char * filename, int how); void exit(int status); + + int kill(pid_t pid, int signal); + + pid_t getppid(); + pid_t getpid(); + pid_t gettid(); } #endif // INCLUDE_LIB_SYSCALL_H diff --git a/daemon/lib/Utils.cpp b/daemon/lib/Utils.cpp index b35a009c..04ee63ba 100644 --- a/daemon/lib/Utils.cpp +++ b/daemon/lib/Utils.cpp @@ -1,6 +1,7 @@ /* Copyright (C) 2018-2022 by Arm Limited. All rights reserved. */ #include "lib/Utils.h" +#include "ExitStatus.h" #include "Logging.h" #include "lib/FsEntry.h" #include "lib/String.h" @@ -14,6 +15,7 @@ #include #include +#include #include #include @@ -23,15 +25,60 @@ #define ANDROID_SHELL_UID 2000 namespace lib { - int parseLinuxVersion(struct utsname & utsname) + namespace { + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) + bool get_uid_from_stat(const char * const username, const char * const tmpDir, uid_t & uid) + { + // Lookups may fail when using a different libc or a statically compiled executable + constexpr auto tmp_str_length = 32; + lib::printf_str_t gatorTemp {"%s/gator_temp", tmpDir}; + + const int fd = lib::open(gatorTemp, O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR); + if (fd < 0) { + return false; + } + close(fd); + + constexpr auto cmd_str_length = 128; + lib::printf_str_t cmd {"chown %s %s || rm -f %s", + username, + gatorTemp.c_str(), + gatorTemp.c_str()}; + + const int pid = fork(); + if (pid < 0) { + LOG_ERROR("fork failed"); + return false; + } + + if (pid == 0) { + execlp("sh", "sh", "-c", cmd, nullptr); + lib::exit(COMMAND_FAILED_EXIT_CODE); + } + + while ((waitpid(pid, nullptr, 0) < 0) && (errno == EINTR)) { + } + + struct stat st; + if (stat(gatorTemp, &st) != 0) { + return false; + } + unlink(gatorTemp); + uid = st.st_uid; + return true; + } + } + + kernel_version_no_t parseLinuxVersion(struct utsname & utsname) { - int version[3] = {0, 0, 0}; + constexpr unsigned base = 10; + + std::array version {{0, 0, 0}}; int part = 0; char * ch = utsname.release; while (*ch >= '0' && *ch <= '9' && part < 3) { - version[part] = 10 * version[part] + *ch - '0'; - + version[part] = (base * version[part]) + (*ch - '0'); ++ch; if (*ch == '.') { ++part; @@ -217,4 +264,49 @@ namespace lib { const uint32_t uid = lib::geteuid(); return (uid == ROOT_UID || uid == ANDROID_SHELL_UID); } + + std::optional> resolve_uid_gid(char const * username) + { + uid_t uid = geteuid(); + gid_t gid = getegid(); + // if name is null then just use the current user + if (username != nullptr) { + // for non root. + // Verify root permissions + auto is_root = (geteuid() == 0); + if (!is_root) { + LOG_ERROR("Unable to set user to %s for command because gatord is not running as root", username); + return {}; + } + + // Look up the username + // NOLINTNEXTLINE(concurrency-mt-unsafe) + struct passwd * const user = getpwnam(username); + if (user != nullptr) { + uid = user->pw_uid; + gid = user->pw_gid; + } + else { + // Unable to get the user without getpwanm, so create a unique uid by adding a fixed number to the pid + constexpr auto gid_random_constant = 0x484560f8; + gid = gid_random_constant + getpid(); + + std::string tmp_dir; + if (access("/tmp", W_OK) == 0) { + // We are on Linux + tmp_dir = "/tmp"; + } + else if (access("/data", W_OK) == 0) { + // We are on Android + tmp_dir = "/data"; + } + + if (tmp_dir.empty() || !get_uid_from_stat(username, tmp_dir.c_str(), uid)) { + LOG_ERROR("Unable to look up the user %s, please double check that the user exists", username); + return {}; + } + } + } + return {{uid, gid}}; + } } diff --git a/daemon/lib/Utils.h b/daemon/lib/Utils.h index 97458156..880e6b6e 100644 --- a/daemon/lib/Utils.h +++ b/daemon/lib/Utils.h @@ -4,14 +4,18 @@ #define INCLUDE_LIB_UTILS_H #include +#include #include #include +#include #include namespace lib { - int parseLinuxVersion(struct utsname & utsname); + using kernel_version_no_t = unsigned; + + kernel_version_no_t parseLinuxVersion(struct utsname & utsname); int readIntFromFile(const char * fullpath, int & value); int readInt64FromFile(const char * fullpath, int64_t & value); @@ -33,10 +37,33 @@ namespace lib { */ bool isRootOrShell(); + /** + * @brief gets the UID and GID for a certain user + * + */ + std::optional> resolve_uid_gid(char const * username); + + /** Takes any type and evaluates to false. + * + * This is used in static_asserts that you always want to fail in templated contexts, as you can't just put false + * in as the compiler will always trigger it even if wrapped in a if constexpr (false). So you use this with an + * input parameter type to force a conditional evaluation. + */ template struct always_false : std::false_type { }; + /** Use in a std::visit to allow for multiple Callables to handle variant types based on overloading. + * + * This is pretty much a copy of the overloaded type used in the example in + * in https://en.cppreference.com/w/cpp/utility/variant/visit. + */ + template + struct visitor_overloader : Ts... { + explicit visitor_overloader(Ts &&... ts) : Ts {std::forward(ts)}... {} + + using Ts::operator()...; + }; } #endif // INCLUDE_LIB_UTILS_H diff --git a/daemon/lib/error_code_or.hpp b/daemon/lib/error_code_or.hpp new file mode 100644 index 00000000..f5bc0ec7 --- /dev/null +++ b/daemon/lib/error_code_or.hpp @@ -0,0 +1,72 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#pragma once + +#include +#include + +#include + +namespace lib { + /** An error code, or some value */ + template + using error_code_or_t = std::variant; + + /** @return The error code, or nullptr if no error */ + template + constexpr auto * get_error(error_code_or_t const & eot) + { + return std::get_if(&eot); + } + + /** @return The value, must previously have been checked for no error */ + template + constexpr T & get_value(error_code_or_t & eot) + { + return std::get(eot); + } + + /** @return The value, must previously have been checked for no error */ + template + constexpr T const & get_value(error_code_or_t const & eot) + { + return std::get(eot); + } + + /** @return The value, must previously have been checked for no error */ + template + constexpr T && get_value(error_code_or_t && eot) + { + return std::move(std::get(std::move(eot))); + } + + /** Copy either the error or the value into one of the provided arguments. + * @return True if the value was extracted, false if the error was + */ + template + constexpr bool get_error_or_value(error_code_or_t const & eot, T & value, E & error) + { + if (auto const * e = get_error(eot)) { + error = *e; + return false; + } + + value = get_value(eot); + return true; + } + + /** Move either the error or the value into one of the provided arguments. + * @return True if the value was extracted, false if the error was + */ + template + constexpr bool get_error_or_value(error_code_or_t && eot, T & value, E & error) + { + if (auto const * e = get_error(eot)) { + error = *e; + return false; + } + + value = get_value(std::move(eot)); + return true; + } +} diff --git a/daemon/lib/forked_process.cpp b/daemon/lib/forked_process.cpp new file mode 100644 index 00000000..166ccbf0 --- /dev/null +++ b/daemon/lib/forked_process.cpp @@ -0,0 +1,255 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#include "lib/forked_process.h" + +#include "ExitStatus.h" +#include "lib/Assert.h" +#include "lib/AutoClosingFd.h" +#include "lib/Syscall.h" +#include "lib/error_code_or.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CHILD_LOG_ERROR_FD(fd, format, ...) dprintf((fd), (format), ##__VA_ARGS__) +#define CHILD_LOG_ERROR(format, ...) CHILD_LOG_ERROR_FD(STDERR_FILENO, (format), ##__VA_ARGS__) + +namespace lib { + + namespace { + [[noreturn]] void kill_self() + { + kill(0, SIGKILL); + _exit(COMMAND_FAILED_EXIT_CODE); + } + } + + // NOLINTNEXTLINE(readability-function-cognitive-complexity) + error_code_or_t forked_process_t::fork_process( + bool prepend_command, + std::string const & cmd, + lib::Span args, + boost::filesystem::path const & cwd, + std::optional> const & uid_gid, + stdio_fds_t stdio_fds) + { + prepend_command |= args.empty(); + + // the current uid/gid and the requested uid/gid + uid_t const c_uid = geteuid(); + gid_t const c_gid = getegid(); + uid_t const r_uid = (uid_gid ? uid_gid->first : -1); + gid_t const r_gid = (uid_gid ? uid_gid->second : -1); + + // log the operation + LOG_DEBUG("Forking exe '%s' with prepend_command=%u, cwd='%s', uid_gid={%d, %d} vs {%d, %d}", + cmd.c_str(), + prepend_command, + cwd.c_str(), + r_uid, + r_gid, + c_uid, + c_gid); + for (auto const & a : args) { + LOG_DEBUG(" ARG: '%s'", a.c_str()); + } + + // this pipe is used to trigger the exec or abort from the parent to the child + auto exec_abort_or_error = pipe_pair_t::create(O_CLOEXEC); + auto const * error = get_error(exec_abort_or_error); + if (error != nullptr) { + return *error; + } + auto exec_abort = get_value(std::move(exec_abort_or_error)); + + // create null terminated args vector (before fork to avoid allocating in child in multithreaded environment) + std::vector args_null_term_list {}; + args_null_term_list.reserve(args.size() + (prepend_command ? 2 : 1)); + if (prepend_command) { + args_null_term_list.push_back(const_cast(cmd.c_str())); + } + for (const auto & arg : args) { + args_null_term_list.push_back(const_cast(arg.c_str())); + } + args_null_term_list.push_back(nullptr); + + char * const * const args_null_term = args_null_term_list.data(); + + // right, lets start the child + auto pid = ::fork(); + + if (pid < 0) { + LOG_DEBUG("fork failed with %d", errno); + return boost::system::errc::make_error_code(boost::system::errc::errc_t(errno)); + } + + if (pid != 0) { + // parent + return forked_process_t(std::move(stdio_fds.stdin_write), + std::move(stdio_fds.stdout_read), + std::move(stdio_fds.stderr_read), + std::move(exec_abort.write), + pid); + } + + // child + + // clear any signal handlers + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); + signal(SIGABRT, SIG_DFL); + signal(SIGALRM, SIG_DFL); + signal(SIGCHLD, SIG_DFL); + signal(SIGHUP, SIG_DFL); + + // Need to change the GPID so that all children of this process will have this processes PID as their GPID. + setpgid(pid, pid); + + prctl(PR_SET_NAME, reinterpret_cast(&"gatord-command"), 0, 0, 0); + + // Close the unused fd's + stdio_fds.stdin_write.close(); + stdio_fds.stdout_read.close(); + stdio_fds.stderr_read.close(); + exec_abort.write.close(); + + if (dup2(stdio_fds.stdin_read.get(), STDIN_FILENO) < 0) { + CHILD_LOG_ERROR_FD(stdio_fds.stderr_write.get(), "dup2(stdin) failed"); + kill_self(); + } + if (dup2(stdio_fds.stdout_write.get(), STDOUT_FILENO) < 0) { + CHILD_LOG_ERROR_FD(stdio_fds.stderr_write.get(), "dup2(stdout) failed"); + kill_self(); + } + if (dup2(stdio_fds.stderr_write.get(), STDERR_FILENO) < 0) { + CHILD_LOG_ERROR_FD(stdio_fds.stderr_write.get(), "dup2(stderr) failed"); + kill_self(); + } + + // disable buffering + ::setvbuf(stdout, nullptr, _IONBF, 0); + ::setvbuf(stderr, nullptr, _IONBF, 0); + + // get sighup if parent exits + if (prctl(PR_SET_PDEATHSIG, SIGKILL) != 0) { + CHILD_LOG_ERROR("prctl(PR_SET_PDEATHSIG, SIGKILL) failed with errno %d", errno); + kill_self(); + } + + if (setpriority(PRIO_PROCESS, lib::gettid(), 0) == -1) { + CHILD_LOG_ERROR("setpriority failed with errno %d", errno); + kill_self(); + } + + if (uid_gid) { + // failure is only an error if c_uid == 0, i.e. we are root + if ((setgroups(1, &r_gid) != 0) && (c_uid == 0)) { + CHILD_LOG_ERROR("setgroups failed, GID %d, with errno %d", r_gid, errno); + kill_self(); + } + if ((setresgid(r_gid, r_gid, r_gid) != 0) && (c_uid == 0)) { + CHILD_LOG_ERROR("setresgid failed, GID %d, with errno %d", r_gid, errno); + kill_self(); + } + if ((setresuid(r_uid, r_uid, r_uid) != 0) && (c_uid == 0)) { + CHILD_LOG_ERROR("setresuid failed, UID %d, with errno %d", r_uid, errno); + kill_self(); + } + } + + // change cwd + if (!cwd.empty()) { + const char * const path = cwd.c_str(); + if (chdir(path) != 0) { + CHILD_LOG_ERROR("chdir(\"%s\") failed; aborting.", path); + kill_self(); + } + } + + // Wait for exec or abort command. + forked_process_t::exec_state_t fail_or_exec = forked_process_t::exec_state_t::abort; + while (read(exec_abort.read.get(), &fail_or_exec, sizeof(fail_or_exec)) < 0) { + if (errno != EINTR) { + CHILD_LOG_ERROR("error while reading exec_abort pipe, with errno %d", errno); + kill_self(); + } + } + + if (fail_or_exec == forked_process_t::exec_state_t::abort) { + CHILD_LOG_ERROR("received exce command abort"); + kill_self(); + } + + prctl(PR_SET_NAME, reinterpret_cast(args_null_term[0]), 0, 0, 0); + execvp(cmd.c_str(), args_null_term); + + // execp returns if there is an error + CHILD_LOG_ERROR("execvp for command failed"); + _exit(errno == ENOENT ? failure_exec_not_found : failure_exec_invalid); + } + + void forked_process_t::abort() + { + AutoClosingFd exec_abort_write {std::move(this->exec_abort_write)}; + + if (exec_abort_write) { + forked_process_t::exec_state_t abort = forked_process_t::exec_state_t::abort; + while (lib::write(exec_abort_write.get(), &abort, sizeof(char)) < 1) { + if (errno != EINTR) { + LOG_DEBUG("abort... write failed with %d", errno); + break; + } + } + } + + auto pid = std::exchange(this->pid, 0); + if (pid > 0) { + if (lib::kill(-pid, SIGTERM) == -1) { + LOG_DEBUG("abort... kill failed with %d", errno); + } + } + }; + + [[nodiscard]] bool forked_process_t::exec() + { + AutoClosingFd exec_abort_write {std::move(this->exec_abort_write)}; + + if (!exec_abort_write) { + return false; + } + + forked_process_t::exec_state_t exec = forked_process_t::exec_state_t::go; + size_t write_result; + while (write_result = lib::write(exec_abort_write.get(), &exec, sizeof(char)), write_result < 1) { + if (errno != EINTR) { + if (write_result == 0) { + LOG_DEBUG("exec write failed, forked process has already exited"); + return false; + } + + // write_result < 0 + LOG_DEBUG("exec write failed with %d", errno); + return false; + } + } + + return true; + }; +} diff --git a/daemon/lib/forked_process.h b/daemon/lib/forked_process.h new file mode 100644 index 00000000..4a146119 --- /dev/null +++ b/daemon/lib/forked_process.h @@ -0,0 +1,115 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ +#pragma once + +#include "Logging.h" +#include "Span.h" +#include "lib/AutoClosingFd.h" +#include "lib/error_code_or.hpp" +#include "lib/forked_process_utils.h" + +#include +#include +#include +#include + +#include +#include + +#include + +namespace lib { + + /** + * Represents a forked process, that may subsequently be exec'd + */ + class forked_process_t { + public: + enum class exec_state_t : char { abort, go }; + + // these match the exit codes that the shell would for invalid exe and file not found + static constexpr int failure_exec_invalid = 126; + static constexpr int failure_exec_not_found = 127; + + forked_process_t() = default; + forked_process_t(forked_process_t const &) = delete; + forked_process_t & operator=(forked_process_t const &) = delete; + + forked_process_t(forked_process_t && that) noexcept + : stdin_write(std::move(that.stdin_write)), + stdout_read(std::move(that.stdout_read)), + stderr_read(std::move(that.stderr_read)), + exec_abort_write(std::move(that.exec_abort_write)), + pid(std::exchange(that.pid, 0)) + { + } + + forked_process_t & operator=(forked_process_t && that) noexcept + { + if (this != &that) { + forked_process_t tmp {std::move(that)}; + std::swap(this->stdin_write, tmp.stdin_write); + std::swap(this->stdout_read, tmp.stdout_read); + std::swap(this->stderr_read, tmp.stderr_read); + std::swap(this->exec_abort_write, tmp.exec_abort_write); + std::swap(this->pid, tmp.pid); + } + return *this; + } + + ~forked_process_t() noexcept { abort(); } + + /** @return True if the process was constructed successfully */ + [[nodiscard]] explicit operator bool() const { return (pid != 0); } + + /** Abort the command that was execvp, send SIGTERM to the command and any children */ + void abort(); + + /** Will make the forked child process stop waiting and exec the command */ + [[nodiscard]] bool exec(); + + /** @return the write end of the process's stdin (may be closed if not reading stdin, or moved out for use elsewhere) */ + [[nodiscard]] AutoClosingFd & get_stdin_write() { return stdin_write; } + + /** @return the read end of the process's stdout (may be closed if redirected to a file, or moved out for use elsewhere) */ + [[nodiscard]] AutoClosingFd & get_stdout_read() { return stdout_read; } + + /** @return the read end of the process's stderr (may be closed if redirected to a file, or moved out for use elsewhere) */ + [[nodiscard]] AutoClosingFd & get_stderr_read() { return stderr_read; } + + /** @return The pid of the forked process */ + [[nodiscard]] pid_t get_pid() const { return pid; } + + /** + * Fork a process and returns the forked_process_t if created without any error. Returns errno in case of an error. + * Child process forked will wait for a notification from the caller to start the commad + * This is done by calling the exec() on forked_process_t created. + */ + static error_code_or_t fork_process(bool prepend_command, + std::string const & cmd, + lib::Span args, + boost::filesystem::path const & cwd, + std::optional> const & uid_gid, + stdio_fds_t stdio_fds); + + /** Constructor */ + forked_process_t(AutoClosingFd && stdin_write, + AutoClosingFd && stdout_read, + AutoClosingFd && stderr_read, + AutoClosingFd && exec_abort_write, + pid_t pid) + : stdin_write(std::move(stdin_write)), + stdout_read(std::move(stdout_read)), + stderr_read(std::move(stderr_read)), + exec_abort_write(std::move(exec_abort_write)), + pid(pid) + { + } + + private: + AutoClosingFd stdin_write; + AutoClosingFd stdout_read; + AutoClosingFd stderr_read; + AutoClosingFd exec_abort_write; + pid_t pid {0}; + }; +} diff --git a/daemon/lib/forked_process_utils.cpp b/daemon/lib/forked_process_utils.cpp new file mode 100644 index 00000000..eb3e87ab --- /dev/null +++ b/daemon/lib/forked_process_utils.cpp @@ -0,0 +1,83 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#include "lib/forked_process_utils.h" + +#include "Logging.h" + +#include +#include +#include +#include + +namespace lib { + + error_code_or_t pipe_pair_t::create(int flags) + { + std::array fds {{-1, -1}}; + + if (pipe2(fds, flags) != 0) { + auto const e = errno; + LOG_DEBUG("pipe2 failed with %d", errno); + return boost::system::errc::make_error_code(boost::system::errc::errc_t(e)); + } + + return pipe_pair_t {AutoClosingFd {fds[0]}, AutoClosingFd {fds[1]}}; + } + + error_code_or_t pipe_pair_t::from_file(char const * path) + { + // NOLINTNEXTLINE(android-cloexec-open) - cloexec is not appropriate for fork/exec redirection :-) + AutoClosingFd fd {::open(path, O_RDONLY)}; + if (!fd) { + auto const e = errno; + LOG_DEBUG("open failed with %d", errno); + return boost::system::errc::make_error_code(boost::system::errc::errc_t(e)); + } + + return pipe_pair_t {std::move(fd), {}}; + } + + error_code_or_t pipe_pair_t::to_file(char const * path, bool truncate, int mode) + { + // NOLINTNEXTLINE(hicpp-signed-bitwise,android-cloexec-open) + AutoClosingFd fd {::open(path, O_WRONLY | O_CREAT | (truncate ? O_TRUNC : 0), mode)}; + if (!fd) { + auto const e = errno; + LOG_DEBUG("open failed with %d", errno); + return boost::system::errc::make_error_code(boost::system::errc::errc_t(e)); + } + + return pipe_pair_t {{}, std::move(fd)}; + } + + error_code_or_t stdio_fds_t::create_pipes() + { + return create_from(pipe_pair_t::create(0), pipe_pair_t::create(0), pipe_pair_t::create(0)); + } + + error_code_or_t stdio_fds_t::create_from(error_code_or_t stdin_pair, + error_code_or_t stdout_pair, + error_code_or_t stderr_pair) + { + auto const * error = get_error(stdin_pair); + if (error != nullptr) { + return *error; + } + + error = get_error(stdout_pair); + if (error != nullptr) { + return *error; + } + + error = get_error(stderr_pair); + if (error != nullptr) { + return *error; + } + + return stdio_fds_t { + get_value(std::move(stdin_pair)), + get_value(std::move(stdout_pair)), + get_value(std::move(stderr_pair)), + }; + } +} diff --git a/daemon/lib/forked_process_utils.h b/daemon/lib/forked_process_utils.h new file mode 100644 index 00000000..3299c6e4 --- /dev/null +++ b/daemon/lib/forked_process_utils.h @@ -0,0 +1,69 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ +#pragma once + +#include "lib/AutoClosingFd.h" +#include "lib/error_code_or.hpp" + +#include + +namespace lib { + /** Represents a pair of file descriptors that represent the read and write end of a pipe. + * For cases where io is redirected to/from a file, then either the read/write end of the pair may be invalid fd */ + struct pipe_pair_t { + // NOLINTNEXTLINE(hicpp-signed-bitwise) - 0644 + static constexpr int default_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; + + AutoClosingFd read; + AutoClosingFd write; + + static error_code_or_t create(int flags); + static error_code_or_t from_file(char const * path); + static error_code_or_t to_file(char const * path, bool truncate = true, int mode = default_mode); + }; + + /** + * The set of all fds required for a forked process, being stdin, stdout, and stderr. + */ + struct stdio_fds_t { + AutoClosingFd stdin_read; + AutoClosingFd stdin_write; + AutoClosingFd stdout_read; + AutoClosingFd stdout_write; + AutoClosingFd stderr_read; + AutoClosingFd stderr_write; + + constexpr stdio_fds_t() = default; + + stdio_fds_t(AutoClosingFd stdin_read, + AutoClosingFd stdin_write, + AutoClosingFd stdout_read, + AutoClosingFd stdout_write, + AutoClosingFd stderr_read, + AutoClosingFd stderr_write) + : stdin_read(std::move(stdin_read)), + stdin_write(std::move(stdin_write)), + stdout_read(std::move(stdout_read)), + stdout_write(std::move(stdout_write)), + stderr_read(std::move(stderr_read)), + stderr_write(std::move(stderr_write)) + { + } + + stdio_fds_t(pipe_pair_t stdin_pair, pipe_pair_t stdout_pair, pipe_pair_t stderr_pair) + : stdin_read(std::move(stdin_pair.read)), + stdin_write(std::move(stdin_pair.write)), + stdout_read(std::move(stdout_pair.read)), + stdout_write(std::move(stdout_pair.write)), + stderr_read(std::move(stderr_pair.read)), + stderr_write(std::move(stderr_pair.write)) + { + } + + /** Create all io fds from pipes */ + static error_code_or_t create_pipes(); + /** Create from the provided pairs */ + static error_code_or_t create_from(error_code_or_t stdin_pair, + error_code_or_t stdout_pair, + error_code_or_t stderr_pair); + }; +} diff --git a/daemon/lib/memory_pool.cpp b/daemon/lib/memory_pool.cpp deleted file mode 100644 index a88b6c83..00000000 --- a/daemon/lib/memory_pool.cpp +++ /dev/null @@ -1,83 +0,0 @@ -/* Copyright (C) 2021 by Arm Limited. All rights reserved. */ - -#include "lib/memory_pool.h" - -#include -#include -#include - -#include - -using lib::Span; - -namespace { - using use_list_type = std::vector; - - constexpr std::size_t use_list_reserve_count = 100; - - void deallocate(use_list_type & use_list, Span chunk) - { - auto it = std::find_if(use_list.begin(), use_list.end(), [&](auto c) { return c.data() == chunk.data(); }); - if (it != use_list.end()) { - use_list.erase(it); - } - } -} - -namespace lib::alloc { - - memory_pool_t::memory_pool_t(std::size_t size) - { - mem.resize(size); - // Minimise allocations - use_list.reserve(use_list_reserve_count); - } - - memory_pool_t::pointer_type memory_pool_t::alloc(std::size_t size) - { - const auto add_chunk = [&](pointer_type::element_type chunk) { - use_list.push_back(chunk); - std::sort(use_list.begin(), use_list.end(), [](auto a, auto b) { return a.data() < b.data(); }); - - return pointer_type {chunk, [this, chunk]() { deallocate(use_list, chunk); }}; - }; - - const auto gap_checker = [&](pointer_type::element_type prev_span, pointer_type::element_type this_span) { - const auto gap = static_cast(this_span.data() - prev_span.end()); - return gap >= size; - }; - - // If the list is empty, just allocate from the start - if (use_list.empty()) { - if (size > mem.size()) { - return {}; - } - return add_chunk({mem.data(), size}); - } - - // Check the gap preceding the first chunk (if any), this is done outside of - // the loop so there's fewer conditionals in it - if (gap_checker({mem.data(), 0}, use_list.front())) { - return add_chunk({mem.data(), size}); - } - - // Check the gaps between any used elements are big enough - for (auto i = 1U; i < use_list.size(); ++i) { - auto prev_span = use_list[i - 1]; - auto this_span = use_list[i]; - - if (gap_checker(prev_span, this_span)) { - return add_chunk({prev_span.end(), size}); - } - } - - // ... And check the gap after the last chunk (if any) - if (gap_checker(use_list.back(), {mem.end().base(), 0})) { - return add_chunk({use_list.back().end(), size}); - } - - // There's no gaps big enough! - return {}; - }; - -} diff --git a/daemon/lib/memory_pool.h b/daemon/lib/memory_pool.h deleted file mode 100644 index 75f5ae55..00000000 --- a/daemon/lib/memory_pool.h +++ /dev/null @@ -1,105 +0,0 @@ -/* Copyright (C) 2021 by Arm Limited. All rights reserved. */ - -#pragma once - -#include "lib/Span.h" - -#include -#include -#include - -namespace lib::alloc { - /** Fixed size memory pool, used to minimise heap allocations. - * - * Ring buffers can't be contiguous (because they wrap) which prevents more - * efficient copying when using the STL - their iterators don't conform to the - * std::contiguous_iterator concept. - * - * So for the intermediary buffer I've gone for a memory pool pattern instead. - * Fragmentation may be an issue though, hard to know without testing it. - * - * This class can be moved but not copied. This class is not thread-safe. - */ - class memory_pool_t { - public: - /** Unique pointer that releases the allocation on destruction. */ - class pointer_type { - friend class memory_pool_t; - - public: - using element_type = lib::Span; - - pointer_type() = default; - - pointer_type(std::nullptr_t) : pointer_type {} {} - - pointer_type(pointer_type &&) = default; - pointer_type & operator=(pointer_type &&) = default; - - pointer_type(const pointer_type &) = delete; - pointer_type & operator=(const pointer_type &) = delete; - - ~pointer_type() - { - if (dealloc_) { - dealloc_(); - } - } - - element_type * operator->() noexcept { return &span_; } - - element_type operator*() const noexcept { return span_; } - - operator bool() const noexcept { return span_.size(); } - - bool operator==(const pointer_type & other) const noexcept { return span_.data() == other.span_.data(); } - - bool operator!=(const pointer_type & other) const noexcept { return !(*this == other); } - - void reset() - { - span_ = element_type {}; - if (dealloc_) { - dealloc_(); - } - } - - private: - pointer_type(element_type span, std::function dealloc) : span_ {span}, dealloc_ {std::move(dealloc)} - { - } - - element_type span_; - std::function dealloc_; - }; - - /** Constructor. - * - * The heap memory is allocated at once upon construction. - * @param size Capacity of memory pool - */ - explicit memory_pool_t(std::size_t size); - - memory_pool_t(memory_pool_t &&) = default; - memory_pool_t & operator=(memory_pool_t &&) = default; - memory_pool_t(const memory_pool_t &) = delete; - memory_pool_t & operator=(const memory_pool_t &) = delete; - - /** Allocate @a size contiguous bytes from the pool. - * - * @param size Number of contiguous bytes to allocate - * @return Managed span defining the memory, will be a nullptr if not - * enough contiguous free space is available - */ - [[nodiscard]] pointer_type alloc(std::size_t size); - - [[nodiscard]] std::size_t size() const { return mem.size(); } - - private: - using use_list_type = std::vector; - - std::vector mem; - use_list_type use_list; - }; - -} diff --git a/daemon/linux/PerCoreIdentificationThread.cpp b/daemon/linux/PerCoreIdentificationThread.cpp index d95dca3d..dd0fe610 100644 --- a/daemon/linux/PerCoreIdentificationThread.cpp +++ b/daemon/linux/PerCoreIdentificationThread.cpp @@ -5,6 +5,7 @@ #include "Logging.h" #include "lib/Assert.h" #include "lib/String.h" +#include "lib/Syscall.h" #include "lib/Utils.h" #include "linux/CoreOnliner.h" @@ -50,7 +51,7 @@ bool PerCoreIdentificationThread::configureAffinity() // the maximum number of times we will attempt to affine to the core before bailing static constexpr unsigned AFFINE_LOOP_COUNT = 65535; - const pid_t tid = syscall(__NR_gettid); + const pid_t tid = lib::gettid(); cpu_set_t cpuset; CPU_ZERO(&cpuset); diff --git a/daemon/Tracepoints.cpp b/daemon/linux/Tracepoints.cpp similarity index 98% rename from daemon/Tracepoints.cpp rename to daemon/linux/Tracepoints.cpp index 07794b56..1d2bad76 100644 --- a/daemon/Tracepoints.cpp +++ b/daemon/linux/Tracepoints.cpp @@ -1,6 +1,6 @@ -/* Copyright (C) 2013-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2013-2022 by Arm Limited. All rights reserved. */ -#include "Tracepoints.h" +#include "linux/Tracepoints.h" #include "Config.h" #include "DynBuf.h" diff --git a/daemon/Tracepoints.h b/daemon/linux/Tracepoints.h similarity index 97% rename from daemon/Tracepoints.h rename to daemon/linux/Tracepoints.h index 1b3eb312..5d04880b 100644 --- a/daemon/Tracepoints.h +++ b/daemon/linux/Tracepoints.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2013-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2013-2022 by Arm Limited. All rights reserved. */ #ifndef TRACEPOINTS_H #define TRACEPOINTS_H diff --git a/daemon/linux/perf/IPerfBufferConsumer.h b/daemon/linux/perf/IPerfBufferConsumer.h deleted file mode 100644 index cf20b74b..00000000 --- a/daemon/linux/perf/IPerfBufferConsumer.h +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (C) 2013-2020 by Arm Limited. All rights reserved. */ - -#pragma once - -#include "Config.h" -#include "lib/Span.h" - -#include -#include -#include - -class IPerfBufferConsumer { -public: - virtual ~IPerfBufferConsumer() = default; - - using data_word_t = std::uint64_t; - - /** - * A chunk of a perf aux record - */ - struct AuxRecordChunk { - /** The pointer to the first byte of the record */ - const char * chunkPointer; - /** The number of bytes in the record */ - std::size_t byteCount; - }; - - /** - * A chunk of a perf data record - */ - struct DataRecordChunk { - /** The pointer to the first word of the record (where each word is a U64) */ - const data_word_t * chunkPointer; - /** The number of U64 words (not bytes) in the record */ - std::size_t wordCount; - }; - - /** - * A tuple of {@link DataRecordChunk}s where the first chunk is required and the second is optional. - * Each chunk specifies a sequence of words that make up the record. - * - * The second chunk is used when the record is split across the end of the ring-buffer. When it is - * not used, it will have its length set to zero. - */ - struct DataRecordChunkTuple { - DataRecordChunk firstChunk; - DataRecordChunk optionalSecondChunk; - }; - - /** - * Consume a chunk of aux data - * - * @param cpu The CPU the data came from - * @param auxTailValue The Initial 'tail' value for the aux data - * @param recordChunks The span of chunks that contains the data - */ - virtual void consumePerfAuxRecord(int cpu, - std::uint64_t auxTailValue, - lib::Span recordChunks) = 0; - - /** - * Consume a sequence of perf data record chunks - * - * @param cpu The CPU the records came from - * @param recordChunks The sequence of chunk-tuples - */ - virtual void consumePerfDataRecord(int cpu, lib::Span recordChunks) = 0; -}; diff --git a/daemon/linux/perf/PerfBuffer.cpp b/daemon/linux/perf/PerfBuffer.cpp deleted file mode 100644 index 97e36e5a..00000000 --- a/daemon/linux/perf/PerfBuffer.cpp +++ /dev/null @@ -1,388 +0,0 @@ -/* Copyright (C) 2013-2022 by Arm Limited. All rights reserved. */ - -#include "linux/perf/PerfBuffer.h" - -#include "BufferUtils.h" -#include "ISender.h" -#include "Logging.h" -#include "PerfUtils.h" -#include "Protocol.h" -#include "k/perf_event.h" -#include "lib/String.h" -#include "lib/Syscall.h" -#include "lib/Utils.h" - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -template -static T readOnceAtomicRelaxed(const T & val) -{ - return __atomic_load_n(static_cast(&val), __ATOMIC_RELAXED); -} - -void validate(const perf_ringbuffer_config_t & config) -{ - if (((config.pageSize - 1) & config.pageSize) != 0) { - LOG_ERROR("perf_ringbuffer_config_t.pageSize (%zu) must be a power of 2", config.pageSize); - handleException(); - } - if (((config.dataBufferSize - 1) & config.dataBufferSize) != 0) { - LOG_ERROR("perf_ringbuffer_config_t.dataBufferSize (%zu) must be a power of 2", config.dataBufferSize); - handleException(); - } - if (config.dataBufferSize < config.pageSize) { - LOG_ERROR("perf_ringbuffer_config_t.dataBufferSize (%zu) must be a multiple of " - "perf_ringbuffer_config_t.pageSize (%zu)", - config.dataBufferSize, - config.pageSize); - handleException(); - } - - if (((config.auxBufferSize - 1) & config.auxBufferSize) != 0) { - LOG_ERROR("perf_ringbuffer_config_t.auxBufferSize (%zu) must be a power of 2", config.auxBufferSize); - handleException(); - } - if ((config.auxBufferSize < config.pageSize) && (config.auxBufferSize != 0)) { - LOG_ERROR("perf_ringbuffer_config_t.auxBufferSize (%zu) must be a multiple of " - "perf_ringbuffer_config_t.pageSize (%zu)", - config.auxBufferSize, - config.pageSize); - handleException(); - } -} - -static std::size_t getDataMMapLength(const perf_ringbuffer_config_t & config) -{ - return config.pageSize + config.dataBufferSize; -} - -std::size_t PerfBuffer::getDataBufferLength() const -{ - return mConfig.dataBufferSize; -} - -std::size_t PerfBuffer::getAuxBufferLength() const -{ - return mConfig.auxBufferSize; -} - -PerfBuffer::PerfBuffer(perf_ringbuffer_config_t config) : mConfig(config) -{ - validate(mConfig); -} - -PerfBuffer::~PerfBuffer() -{ - for (auto cpuAndBuf : mBuffers) { - lib::munmap(cpuAndBuf.second.data_buffer, getDataMMapLength(mConfig)); - if (cpuAndBuf.second.aux_buffer != nullptr) { - lib::munmap(cpuAndBuf.second.aux_buffer, getAuxBufferLength()); - } - } -} - -//TODO remove additional debug logging -//NOLINTNEXTLINE( readability-function-cognitive-complexity) -bool PerfBuffer::useFd(const int fd, int cpu, bool collectAuxTrace) -{ - auto mmap = [this, cpu](size_t length, size_t offset, int fd) { - void * const buf = lib::mmap(nullptr, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); - - if (buf == MAP_FAILED) { - LOG_DEBUG("mmap failed for fd %i (errno=%d, %s, mmapLength=%zu, offset=%zu)", - fd, - errno, - strerror(errno), - length, - offset); - if ((errno == ENOMEM) || ((errno == EPERM) && (geteuid() != 0))) { - //NOLINTNEXTLINE(concurrency-mt-unsafe) - LOG_ERROR("Could not mmap perf buffer on cpu %d, '%s' (errno: %d) returned.\n" - "This may be caused by too small limit in /proc/sys/kernel/perf_event_mlock_kb\n" - "Try again with a smaller value of --mmap-pages\n" - "Usually a value of ((perf_event_mlock_kb * 1024 / page_size) - 1) or lower will work.\n" - "The current value effective value for --mmap-pages is %zu", - cpu, - strerror(errno), - errno, - mConfig.dataBufferSize / mConfig.pageSize); - const std::size_t max_size = 128; - lib::printf_str_t buffer {"/sys/devices/system/cpu/cpu%u/online", cpu}; - int64_t online_status = 0; - lib::readInt64FromFile(buffer, online_status); - LOG_DEBUG("Online status for cpu%d is %" PRId64, cpu, online_status); - - std::optional fileValue = perf_utils::readPerfEventMlockKb(); - if (fileValue.has_value()) { - LOG_DEBUG(" Perf MlockKb Value is %" PRId64, fileValue.value()); - } - else { - LOG_DEBUG("reading Perf MlockKb returned null"); - } - } - else { - LOG_DEBUG("mmap failed for a different reason"); - } - } - else { - LOG_DEBUG("mmap passed for fd %i (mmapLength=%zu, offset=%zu)", fd, length, offset); - } - return buf; - }; - - auto buffer = mBuffers.find(cpu); - if (buffer != mBuffers.end()) { - if (lib::ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, buffer->second.fd) < 0) { - LOG_DEBUG("ioctl failed for fd %i (errno=%d, %s)", fd, errno, strerror(errno)); - return false; - } - } - else { - void * buf = mmap(getDataMMapLength(mConfig), 0, fd); - if (buf == MAP_FAILED) { - return false; - } - - mBuffers[cpu] = Buffer {buf, nullptr, fd, -1}; - - struct perf_event_mmap_page & pemp = *static_cast(buf); - // Check the version - const uint32_t compat_version = pemp.compat_version; - if (compat_version != 0) { - LOG_DEBUG("Incompatible perf_event_mmap_page compat_version (%i) for fd %i", compat_version, fd); - return false; - } - } - - if (collectAuxTrace) { - auto & buffer = mBuffers[cpu]; - if (buffer.aux_buffer == nullptr) { - const size_t offset = getDataMMapLength(mConfig); - const size_t length = getAuxBufferLength(); - - struct perf_event_mmap_page & pemp = *static_cast(buffer.data_buffer); - pemp.aux_offset = offset; - pemp.aux_size = length; - - void * buf = mmap(length, offset, fd); - if (buf == MAP_FAILED) { - return false; - } - - buffer.aux_buffer = buf; - if (buffer.aux_fd >= 0) { - LOG_DEBUG("Multiple aux fds"); - return false; - } - buffer.aux_fd = fd; - } - } - - return true; -} - -void PerfBuffer::discard(int cpu) -{ - auto it = mBuffers.find(cpu); - if (it != mBuffers.end()) { - it->second.aux_fd = -1; - mDiscard.insert(cpu); - } -} - -bool PerfBuffer::isFull() -{ - for (auto cpuAndBuf : mBuffers) { - // Take a snapshot of the positions - auto * pemp = static_cast(cpuAndBuf.second.data_buffer); - const uint64_t dataHead = readOnceAtomicRelaxed(pemp->data_head); - - if ((dataHead + 2000) >= getDataBufferLength()) { - return true; - } - - if (cpuAndBuf.second.aux_buffer != nullptr) { - const uint64_t auxHead = readOnceAtomicRelaxed(pemp->aux_head); - if ((auxHead + 2000) >= getAuxBufferLength()) { - return true; - } - } - } - - return false; -} - -static void sendAuxFrame(IPerfBufferConsumer & bufferConsumer, - int cpu, - uint64_t headerTail, - uint64_t headerHead, - const char * buffer, - std::size_t length) -{ - const std::size_t bufferMask = length - 1; - - // will be 'length' at most otherwise somehow wrapped many times - const std::size_t totalDataSize = std::min(headerHead - headerTail, length); - const std::uint64_t head = headerHead; - // will either be the same as 'tail' or will be > if somehow wrapped multiple times - const std::uint64_t tail = (headerHead - totalDataSize); - - const std::size_t tailMasked = (tail & bufferMask); - const std::size_t headMasked = (head & bufferMask); - - const bool haveWrapped = headMasked < tailMasked; - - const std::size_t firstSize = (haveWrapped ? (length - tailMasked) : totalDataSize); - const std::size_t secondSize = (haveWrapped ? headMasked : 0); - - const IPerfBufferConsumer::AuxRecordChunk chunks[2] = {{buffer + tailMasked, firstSize}, {buffer, secondSize}}; - - bufferConsumer.consumePerfAuxRecord(cpu, tail, chunks); -} - -template -static inline const T * ringBufferPtr(const char * base, std::size_t positionMasked) -{ - return reinterpret_cast(base + positionMasked); -} - -template -static inline const T * ringBufferPtr(const char * base, std::uint64_t position, std::size_t sizeMask) -{ - return ringBufferPtr(base, (position & sizeMask)); -} - -static void sendDataFrame(IPerfBufferConsumer & bufferConsumer, - int cpu, - uint64_t head, - uint64_t tail, - const char * b, - std::size_t length) -{ - static constexpr std::size_t CHUNK_BUFFER_SIZE = 256; // arbitrary, roughly 4k size stack allocation on 64-bit - static constexpr std::size_t CHUNK_WORD_SIZE = sizeof(IPerfBufferConsumer::data_word_t); - - const std::size_t bufferMask = length - 1; - - std::size_t numChunksInBuffer = 0; - IPerfBufferConsumer::DataRecordChunkTuple chunkBuffer[CHUNK_BUFFER_SIZE]; - - while (head > tail) { - // write the chunks we have so far, so we can reuse the buffer - if (numChunksInBuffer == CHUNK_BUFFER_SIZE) { - bufferConsumer.consumePerfDataRecord(cpu, {chunkBuffer, numChunksInBuffer}); - numChunksInBuffer = 0; - } - - // create the next chunk - const auto * recordHeader = ringBufferPtr(b, tail, bufferMask); - const auto recordSize = (recordHeader->size + CHUNK_WORD_SIZE - 1) & ~(CHUNK_WORD_SIZE - 1); - const auto recordEnd = tail + recordSize; - const std::size_t baseMasked = (tail & bufferMask); - const std::size_t endMasked = (recordEnd & bufferMask); - - const bool haveWrapped = endMasked < baseMasked; - - const std::size_t firstSize = (haveWrapped ? (length - baseMasked) : recordSize); - const std::size_t secondSize = (haveWrapped ? endMasked : 0); - - // set chunk - chunkBuffer[numChunksInBuffer].firstChunk.chunkPointer = - ringBufferPtr(b, baseMasked); - chunkBuffer[numChunksInBuffer].firstChunk.wordCount = firstSize / CHUNK_WORD_SIZE; - chunkBuffer[numChunksInBuffer].optionalSecondChunk.chunkPointer = - ringBufferPtr(b, 0); - chunkBuffer[numChunksInBuffer].optionalSecondChunk.wordCount = secondSize / CHUNK_WORD_SIZE; - - numChunksInBuffer += 1; - tail = recordEnd; - } - - // write the remaining chunks - if (numChunksInBuffer > 0) { - bufferConsumer.consumePerfDataRecord(cpu, {chunkBuffer, numChunksInBuffer}); - } -} - -bool PerfBuffer::send(IPerfBufferConsumer & bufferConsumer) -{ - const std::size_t dataBufferLength = getDataBufferLength(); - const std::size_t auxBufferLength = getAuxBufferLength(); - - for (auto cpuAndBufIt = mBuffers.begin(); cpuAndBufIt != mBuffers.end();) { - const int cpu = cpuAndBufIt->first; - - // Take a snapshot of the data buffer positions - // We read the data buffer positions before we read the aux buffer positions - // so that we never send records more recent than the aux - void * const dataBuf = cpuAndBufIt->second.data_buffer; - auto * pemp = static_cast(dataBuf); - const uint64_t dataHead = __atomic_load_n(&pemp->data_head, __ATOMIC_ACQUIRE); - // Only we write this so no atomic load needed - const uint64_t dataTail = pemp->data_tail; - - auto discard = mDiscard.find(cpu); - const bool shouldDiscard = (discard != mDiscard.end()); - - // Now send the aux data before the records to ensure the consumer never receives - // a PERF_RECORD_AUX without already having received the aux data - void * const auxBuf = cpuAndBufIt->second.aux_buffer; - if (auxBuf != nullptr) { - const uint64_t auxHead = __atomic_load_n(&pemp->aux_head, __ATOMIC_ACQUIRE); - // Only we write this so no atomic load needed - const uint64_t auxTail = pemp->aux_tail; - - if (auxHead > auxTail) { - const char * const b = static_cast(auxBuf); - - sendAuxFrame(bufferConsumer, cpu, auxTail, auxHead, b, auxBufferLength); - - // Update tail with the aux read and synchronize with the buffer writer - __atomic_store_n(&pemp->aux_tail, auxHead, __ATOMIC_RELEASE); - - // The AUX buffer event will be disabled if the AUX buffer fills before we read it. - // Since we cannot easily tell that without parsing the data MMAP (which we currently don't do) - // Just call enable again here after updating the tail pointer. That way, if the event was - // disabled, it will be reenabled now so more data can be received - if ((!shouldDiscard) && (cpuAndBufIt->second.aux_fd >= 0)) { - if (lib::ioctl(cpuAndBufIt->second.aux_fd, PERF_EVENT_IOC_ENABLE, 0) != 0) { - LOG_ERROR("Unable to enable a perf event"); - } - } - } - } - - if (dataHead > dataTail) { - const char * const b = static_cast(dataBuf) + mConfig.pageSize; - - sendDataFrame(bufferConsumer, cpu, dataHead, dataTail, b, dataBufferLength); - - // Update tail with the data read and synchronize with the buffer writer - __atomic_store_n(&pemp->data_tail, dataHead, __ATOMIC_RELEASE); - } - - if (shouldDiscard) { - lib::munmap(dataBuf, getDataMMapLength(mConfig)); - if (auxBuf != nullptr) { - lib::munmap(auxBuf, auxBufferLength); - } - mDiscard.erase(discard); - LOG_DEBUG("Unmapped cpu %i", cpu); - cpuAndBufIt = mBuffers.erase(cpuAndBufIt); - } - else { - ++cpuAndBufIt; - } - } - - return true; -} diff --git a/daemon/linux/perf/PerfBuffer.h b/daemon/linux/perf/PerfBuffer.h deleted file mode 100644 index 73a7b587..00000000 --- a/daemon/linux/perf/PerfBuffer.h +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright (C) 2013-2022 by Arm Limited. All rights reserved. */ - -#ifndef PERF_BUFFER -#define PERF_BUFFER - -#include "Config.h" -#include "lib/Span.h" -#include "linux/perf/IPerfBufferConsumer.h" - -#include -#include -#include - -struct perf_ringbuffer_config_t { - /// must be power of 2 - size_t pageSize; - /// must be power of 2 multiple of pageSize - size_t dataBufferSize; - /// must be power of 2 multiple of pageSize (or 0) - size_t auxBufferSize; -}; - -class PerfBuffer { -public: - explicit PerfBuffer(perf_ringbuffer_config_t config); - ~PerfBuffer(); - - // Intentionally undefined - PerfBuffer(const PerfBuffer &) = delete; - PerfBuffer & operator=(const PerfBuffer &) = delete; - PerfBuffer(PerfBuffer &&) = delete; - PerfBuffer & operator=(PerfBuffer &&) = delete; - - bool useFd(int fd, int cpu, bool collectAuxTrace = false); - void discard(int cpu); - bool isFull(); - bool send(IPerfBufferConsumer & bufferConsumer); - - std::size_t getDataBufferLength() const; - std::size_t getAuxBufferLength() const; - -private: - perf_ringbuffer_config_t mConfig; - - struct Buffer { - void * data_buffer; - void * aux_buffer; // may be null - int fd; - int aux_fd; - }; - - std::map mBuffers {}; - // After the buffer is flushed it should be unmapped - std::set mDiscard {}; -}; - -/** - * Validates that config has allowable values and throws exception if not. - */ -void validate(perf_ringbuffer_config_t const & config); - -#endif // PERF_BUFFER diff --git a/daemon/linux/perf/PerfConfig.h b/daemon/linux/perf/PerfConfig.h index 9b851054..55a4e90d 100644 --- a/daemon/linux/perf/PerfConfig.h +++ b/daemon/linux/perf/PerfConfig.h @@ -4,15 +4,16 @@ #define PERFCONFIG_H struct PerfConfig { - bool has_fd_cloexec = false; // >=3.14 - bool has_count_sw_dummy = false; // >=3.12 - bool has_sample_identifier = false; // >= 3.12 - bool has_attr_comm_exec = false; // >= 3.16 - bool has_attr_mmap2 = false; // >=3.16 - bool has_attr_clockid_support = false; // >= 4.1 - bool has_attr_context_switch = false; // >= 4.3 - bool has_ioctl_read_id = false; // >= 3.12 - bool has_aux_support = false; // >= 4.1 + bool has_fd_cloexec = false; // >=3.14 + bool has_count_sw_dummy = false; // >=3.12 + bool has_sample_identifier = false; // >= 3.12 + bool has_attr_comm_exec = false; // >= 3.16 + bool has_attr_mmap2 = false; // >=3.16 + bool has_attr_clockid_support = false; // >= 4.1 + bool has_attr_context_switch = false; // >= 4.3 + bool has_ioctl_read_id = false; // >= 3.12 + bool has_aux_support = false; // >= 4.1 + bool has_exclude_callchain_kernel = false; // >= 3.7 bool is_system_wide = false; bool exclude_kernel = false; diff --git a/daemon/linux/perf/PerfCpuOnlineMonitor.cpp b/daemon/linux/perf/PerfCpuOnlineMonitor.cpp deleted file mode 100644 index 3d59df28..00000000 --- a/daemon/linux/perf/PerfCpuOnlineMonitor.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright (C) 2019-2021 by Arm Limited. All rights reserved. */ - -#include "linux/perf/PerfCpuOnlineMonitor.h" - -#include "lib/FsEntry.h" - -#include -#include - -#include -#include - -PerfCpuOnlineMonitor::PerfCpuOnlineMonitor(NotificationCallback callback) - : callback(std::move(callback)), thread(launch, this) -{ -} - -PerfCpuOnlineMonitor::~PerfCpuOnlineMonitor() -{ - if (!terminated.load(std::memory_order_relaxed)) { - terminate(); - } -} - -void PerfCpuOnlineMonitor::terminate() -{ - terminated.store(true, std::memory_order_release); - thread.join(); -} - -void PerfCpuOnlineMonitor::launch(PerfCpuOnlineMonitor * _this) noexcept -{ - _this->run(); -} - -void PerfCpuOnlineMonitor::run() noexcept -{ - // rename thread - prctl(PR_SET_NAME, reinterpret_cast("gatord-cpumon"), 0, 0, 0); - - // monitor filesystem - bool firstPass = true; - const lib::FsEntry sysFsCpuRootPath = lib::FsEntry::create("/sys/devices/system/cpu"); - while (!terminated.load(std::memory_order_acquire)) { - // loop through files - std::optional child; - lib::FsEntryDirectoryIterator iterator = sysFsCpuRootPath.children(); - - bool anyOffline = false; - while (!!(child = iterator.next())) { - const auto & name = child->name(); - if ((name.length() > 3) && (name.find("cpu") == 0)) { - // find a CPU node - const unsigned cpu = strtoul(name.c_str() + 3, nullptr, 10); - // read its online state - const lib::FsEntry onlineFsEntry = lib::FsEntry::create(*child, "online"); - const std::string contents = onlineFsEntry.readFileContentsSingleLine(); - if (!contents.empty()) { - const unsigned online = strtoul(contents.c_str(), nullptr, 0); - const bool isOnline = (online != 0); - anyOffline |= !isOnline; - - // process it - process(firstPass, cpu, isOnline); - } - } - } - - // sleep a little before checking again. - // sleep longer if they are all online, otherwise just sleep a short amount of time so as to not miss the core coming back online by too much - usleep(anyOffline ? 200 : 1000); - - // not first pass any more - firstPass = false; - } -} - -void PerfCpuOnlineMonitor::process(bool first, unsigned cpu, bool online) -{ - if (online) { - const auto insertionResult = onlineCores.insert(cpu); - if (insertionResult.second && !first) { - // set was modified so state changed from offline->online - callback(cpu, true); - } - } - else { - const auto removalResult = onlineCores.erase(cpu); - if ((removalResult > 0) && !first) { - // set was modified so state changed from online->offline - callback(cpu, false); - } - } -} diff --git a/daemon/linux/perf/PerfCpuOnlineMonitor.h b/daemon/linux/perf/PerfCpuOnlineMonitor.h deleted file mode 100644 index 900b6d9e..00000000 --- a/daemon/linux/perf/PerfCpuOnlineMonitor.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (C) 2019-2021 by Arm Limited. All rights reserved. */ - -#ifndef INCLUDE_LINUX_PERF_PERF_CPU_ONLINE_MONITOR_H -#define INCLUDE_LINUX_PERF_PERF_CPU_ONLINE_MONITOR_H - -#include -#include -#include -#include - -/** - * A thread that monitors CPU online / offline state (for when uevents are not available) - */ -class PerfCpuOnlineMonitor { -public: - /** Notification callback */ - using NotificationCallback = std::function; - - /** - * Constructor - * - * @param callback The function called when a state change occurs - */ - PerfCpuOnlineMonitor(NotificationCallback callback); - PerfCpuOnlineMonitor(const PerfCpuOnlineMonitor &) = delete; - PerfCpuOnlineMonitor(PerfCpuOnlineMonitor &&) = delete; - ~PerfCpuOnlineMonitor(); - PerfCpuOnlineMonitor & operator=(const PerfCpuOnlineMonitor &) = delete; - PerfCpuOnlineMonitor & operator=(PerfCpuOnlineMonitor &&) = delete; - - /** - * Terminate the thread - */ - void terminate(); - -private: - static void launch(PerfCpuOnlineMonitor *) noexcept; - - void run() noexcept; - void process(bool first, unsigned cpu, bool online); - - std::set onlineCores {}; - NotificationCallback callback; - std::atomic terminated {false}; - std::thread thread; -}; - -#endif /* INCLUDE_LINUX_PERF_PERF_CPU_ONLINE_MONITOR_H */ diff --git a/daemon/linux/perf/PerfDriver.cpp b/daemon/linux/perf/PerfDriver.cpp index f8c0e343..858ad3f4 100644 --- a/daemon/linux/perf/PerfDriver.cpp +++ b/daemon/linux/perf/PerfDriver.cpp @@ -10,7 +10,6 @@ #include "ISummaryConsumer.h" #include "Logging.h" #include "SessionData.h" -#include "Tracepoints.h" #include "agents/perf/perf_driver_summary.h" #include "k/perf_event.h" #include "lib/Assert.h" @@ -18,10 +17,10 @@ #include "lib/Time.h" #include "lib/Utils.h" #include "linux/SysfsSummaryInformation.h" +#include "linux/Tracepoints.h" #include "linux/perf/IPerfGroups.h" #include "linux/perf/PerfAttrsBuffer.h" #include "linux/perf/PerfEventGroupIdentifier.h" -#include "linux/perf/PerfSource.h" #include "xml/PmuXML.h" #include diff --git a/daemon/linux/perf/PerfDriver.h b/daemon/linux/perf/PerfDriver.h index 8915313e..50df3590 100644 --- a/daemon/linux/perf/PerfDriver.h +++ b/daemon/linux/perf/PerfDriver.h @@ -5,10 +5,12 @@ #include "IPerfGroups.h" #include "SimpleDriver.h" +#include "agents/agent_workers_process.h" #include "agents/perf/capture_configuration.h" +#include "agents/perf/source_adapter.h" +#include "linux/Tracepoints.h" #include "linux/perf/PerfConfig.h" #include "linux/perf/PerfDriverConfiguration.h" -#include "linux/perf/PerfSource.h" #include #include @@ -24,6 +26,7 @@ static constexpr const char * GATOR_BOOKMARK = "gator/gator_bookmark"; static constexpr const char * GATOR_COUNTER = "gator/gator_counter"; static constexpr const char * GATOR_TEXT = "gator/gator_text"; +class Child; class ISummaryConsumer; class GatorCpu; class IPerfGroups; @@ -31,8 +34,8 @@ class IPerfAttrsConsumer; class PerfTracepoint; class UncorePmu; class ICpuInfo; +class FtraceDriver; struct TraceFsConstants; -class PerfSource; static const char * MALI_MMU_IN_USE = "Mali: MMU address space in use"; static const char * MALI_PM_STATUS = "Mali: PM Status"; @@ -78,12 +81,17 @@ class PerfDriver : public SimpleDriver { const TraceFsConstants & getTraceFsConstants() const { return traceFsConstants; }; - std::unique_ptr create_source(sem_t & senderSem, - std::function profilingStartedCallback, - std::set appTids, - FtraceDriver & ftraceDriver, - bool enableOnCommandExec, - ICpuInfo & cpuInfo); + std::unique_ptr create_source(sem_t & senderSem, + ISender & sender, + std::function session_ended_callback, + std::function exec_target_app_callback, + std::function profilingStartedCallback, + const std::set & appTids, + FtraceDriver & ftraceDriver, + bool enableOnCommandExec, + ICpuInfo & cpuInfo, + lib::Span uncore_pmus, + agents::agent_workers_process_t & agent_workers_process); private: const TraceFsConstants & traceFsConstants; @@ -102,6 +110,19 @@ class PerfDriver : public SimpleDriver { std::vector get_cpu_cluster_keys_for_cpu_frequency_counter(); + + std::unique_ptr create_source_adapter( + agents::agent_workers_process_t & agent_workers_process, + sem_t & senderSem, + ISender & sender, + std::function session_ended_callback, + std::function exec_target_app_callback, + std::function profiling_started_callback, + const std::set & app_tids, + lib::Span uncore_pmus, + const perf_groups_configurer_state_t & perf_groups, + const agents::perf::buffer_config_t & ringbuffer_config, + bool enable_on_exec); }; #endif // PERFDRIVER_H diff --git a/daemon/linux/perf/PerfDriverConfiguration.cpp b/daemon/linux/perf/PerfDriverConfiguration.cpp index 07126cca..6b4358d6 100644 --- a/daemon/linux/perf/PerfDriverConfiguration.cpp +++ b/daemon/linux/perf/PerfDriverConfiguration.cpp @@ -125,7 +125,7 @@ std::unique_ptr PerfDriverConfiguration::detect(bool sy LOG_DEBUG("Kernel version: %s", utsname.release); // Check the kernel version - const int kernelVersion = lib::parseLinuxVersion(utsname); + auto const kernelVersion = lib::parseLinuxVersion(utsname); const bool hasArmv7PmuDriver = beginsWith(utsname.machine, "armv7") || FsEntry::create("/sys/bus/event_source/devices").hasChildWithNamePrefix("armv7"); @@ -141,7 +141,7 @@ std::unique_ptr PerfDriverConfiguration::detect(bool sy const bool use_64bit_register_set = (sizeof(void *) == 8) || has_64bit_uname; - if (kernelVersion < KERNEL_VERSION(3, 4, 0)) { + if (kernelVersion < KERNEL_VERSION(3U, 4U, 0U)) { const char error[] = "Unsupported kernel version\nPlease upgrade to 3.4 or later"; LOG_SETUP(error); LOG_ERROR(error); @@ -221,7 +221,7 @@ std::unique_ptr PerfDriverConfiguration::detect(bool sy } // Must have tracepoints or perf_event_attr.context_switch for sched switch info - if (systemWide && (!can_access_raw_tracepoints) && (kernelVersion < KERNEL_VERSION(4, 3, 0))) { + if (systemWide && (!can_access_raw_tracepoints) && (kernelVersion < KERNEL_VERSION(4U, 3U, 0U))) { if (can_access_tracepoints) { LOG_SETUP("System wide tracing\nperf_event_paranoid > -1 is not supported for system-wide non-root"); LOG_ERROR("perf_event_open: perf_event_paranoid > -1 is not supported for system-wide non-root.\n" @@ -263,15 +263,16 @@ std::unique_ptr PerfDriverConfiguration::detect(bool sy // create the configuration object, from this point on perf is supported std::unique_ptr configuration {new PerfDriverConfiguration()}; - configuration->config.has_fd_cloexec = (kernelVersion >= KERNEL_VERSION(3, 14, 0)); - configuration->config.has_count_sw_dummy = (kernelVersion >= KERNEL_VERSION(3, 12, 0)); - configuration->config.has_sample_identifier = (kernelVersion >= KERNEL_VERSION(3, 12, 0)); - configuration->config.has_attr_comm_exec = (kernelVersion >= KERNEL_VERSION(3, 16, 0)); - configuration->config.has_attr_mmap2 = (kernelVersion >= KERNEL_VERSION(3, 16, 0)); - configuration->config.has_attr_clockid_support = (kernelVersion >= KERNEL_VERSION(4, 1, 0)); - configuration->config.has_attr_context_switch = (kernelVersion >= KERNEL_VERSION(4, 3, 0)); - configuration->config.has_ioctl_read_id = (kernelVersion >= KERNEL_VERSION(3, 12, 0)); - configuration->config.has_aux_support = (kernelVersion >= KERNEL_VERSION(4, 1, 0)); + configuration->config.has_fd_cloexec = (kernelVersion >= KERNEL_VERSION(3U, 14U, 0U)); + configuration->config.has_count_sw_dummy = (kernelVersion >= KERNEL_VERSION(3U, 12U, 0U)); + configuration->config.has_sample_identifier = (kernelVersion >= KERNEL_VERSION(3U, 12U, 0U)); + configuration->config.has_attr_comm_exec = (kernelVersion >= KERNEL_VERSION(3U, 16U, 0U)); + configuration->config.has_attr_mmap2 = (kernelVersion >= KERNEL_VERSION(3U, 16U, 0U)); + configuration->config.has_attr_clockid_support = (kernelVersion >= KERNEL_VERSION(4U, 1U, 0U)); + configuration->config.has_attr_context_switch = (kernelVersion >= KERNEL_VERSION(4U, 3U, 0U)); + configuration->config.has_ioctl_read_id = (kernelVersion >= KERNEL_VERSION(3U, 12U, 0U)); + configuration->config.has_aux_support = (kernelVersion >= KERNEL_VERSION(4U, 1U, 0U)); + configuration->config.has_exclude_callchain_kernel = (kernelVersion >= KERNEL_VERSION(3U, 7U, 0U)); configuration->config.is_system_wide = systemWide; configuration->config.exclude_kernel = exclude_kernel; diff --git a/daemon/linux/perf/PerfDriverCreateSource.cpp b/daemon/linux/perf/PerfDriverCreateSource.cpp index faa665d6..8812a1cd 100644 --- a/daemon/linux/perf/PerfDriverCreateSource.cpp +++ b/daemon/linux/perf/PerfDriverCreateSource.cpp @@ -1,20 +1,93 @@ /* Copyright (C) 2022 by Arm Limited. All rights reserved. */ -#include "Command.h" +#include "Child.h" +#include "DynBuf.h" #include "ICpuInfo.h" +#include "ISender.h" #include "Logging.h" +#include "Proc.h" #include "SessionData.h" +#include "Source.h" +#include "agents/agent_workers_process.h" +#include "agents/perf/capture_configuration.h" +#include "ipc/messages.h" +#include "lib/Utils.h" +#include "linux/perf/PerfAttrsBuffer.h" #include "linux/perf/PerfDriver.h" -#include "linux/perf/PerfSource.h" +#include "linux/perf/PerfDriverConfiguration.h" +#include "linux/perf/PerfGroups.h" +#include "xml/PmuXML.h" + +#include +#include +#include +#include + +#include + +namespace { + constexpr std::size_t MEGABYTES = 1024UL * 1024UL; + constexpr std::size_t AUX_MULTIPLIER = 64UL; // size multiplier for session buffer size to aux buffer size + + // this used to be done in PerfSource::run but is not part of the new perf agent. + // it's placed here as a stop-gap measure until the ftrace agent has been written. + void send_tracepoint_formats(FtraceDriver & driver, PerfAttrsBuffer & buffer, bool system_wide) + { + DynBuf printb; + DynBuf b1; + + if (!readProcSysDependencies(buffer, &printb, &b1, driver)) { + if (system_wide) { + LOG_ERROR("readProcSysDependencies failed"); + handleException(); + } + else { + LOG_DEBUG("readProcSysDependencies failed"); + } + } + + buffer.flush(); + } + + std::map collect_pmu_type_to_name_map(PerfDriverConfiguration & config) + { + std::map type_to_name; + + for (const auto & uncore : config.uncores) { + type_to_name[static_cast(uncore.pmu_type)] = uncore.uncore_pmu.getId(); + } + + return type_to_name; + } + + agents::perf::buffer_config_t create_perf_buffer_config() + { + return { + static_cast(gSessionData.mPageSize), + (gSessionData.mPerfMmapSizeInPages > 0 + ? static_cast(gSessionData.mPageSize * gSessionData.mPerfMmapSizeInPages) + : static_cast(gSessionData.mTotalBufferSize) * MEGABYTES), + (gSessionData.mPerfMmapSizeInPages > 0 + ? static_cast(gSessionData.mPageSize * gSessionData.mPerfMmapSizeInPages) + : static_cast(gSessionData.mTotalBufferSize) * MEGABYTES * AUX_MULTIPLIER), + }; + } + +} /// this method is extracted so that it can be excluded from the unit tests as it brings deps on PerfSource... -std::unique_ptr PerfDriver::create_source(sem_t & senderSem, - std::function profilingStartedCallback, - std::set appTids, - FtraceDriver & ftraceDriver, - bool enableOnCommandExec, - ICpuInfo & cpuInfo) +std::unique_ptr PerfDriver::create_source(sem_t & senderSem, + ISender & sender, + std::function session_ended_callback, + std::function exec_target_app_callback, + std::function profilingStartedCallback, + const std::set & appTids, + FtraceDriver & ftraceDriver, + bool enableOnCommandExec, + ICpuInfo & cpuInfo, + lib::Span uncore_pmus, + agents::agent_workers_process_t & agent_workers_process) { auto attrs_buffer = std::make_unique(gSessionData.mTotalBufferSize * MEGABYTES, senderSem); @@ -23,7 +96,7 @@ std::unique_ptr PerfDriver::create_source(sem_t & senderSem, cpuInfo.getClusters(), cpuInfo.getClusterIds(), mConfig.config.exclude_kernel || gSessionData.mExcludeKernelEvents, - PerfSource::createPerfBufferConfig(), + create_perf_buffer_config(), getTracepointId(traceFsConstants, SCHED_SWITCH), // We disable periodic sampling if we have at least one EBS counter // it should probably be independent of EBS though @@ -44,34 +117,175 @@ std::unique_ptr PerfDriver::create_source(sem_t & senderSem, } { - perf_groups_configurer_t groups_builder {event_configurer_config, event_configurer_state}; attr_to_key_mapping_tracker_t wrapper {*attrs_buffer}; + perf_groups_configurer_t groups_builder {wrapper, event_configurer_config, event_configurer_state}; if (!enable(groups_builder, wrapper)) { LOG_DEBUG("perf setup failed, are you running Linux 3.4 or later?"); return {}; } - // TODO: async send mapping tracked items } + // write directly to the sender attrs_buffer->flush(); + attrs_buffer->write(sender); + + // add the tracepoint formats + send_tracepoint_formats(ftraceDriver, *attrs_buffer, mConfig.config.is_system_wide); + // write directly to the sender + attrs_buffer->flush(); + attrs_buffer->write(sender); + + return create_source_adapter(agent_workers_process, + senderSem, + sender, + std::move(session_ended_callback), + std::move(exec_target_app_callback), + std::move(profilingStartedCallback), + appTids, + uncore_pmus, + event_configurer_state, + create_perf_buffer_config(), + enableOnCommandExec); +} - auto result = std::make_unique( - event_configurer_config, - perf_groups_activator_state_t::convert_from(std::move(event_configurer_state)), - std::move(attrs_buffer), +[[nodiscard]] static bool wait_for_ready(std::optional const & ready_worker, + std::optional const & ready_agent, + bool session_ended) +{ + // wait, if no worker value received + if (!ready_worker) { + return true; + } + + // stop waiting if worker failed + if (!*ready_worker) { + return false; + } + + // stop if the session ended + if (session_ended) { + return false; + } + + // worker is started successfully, wait for agent + return (!ready_agent); +} + +std::unique_ptr PerfDriver::create_source_adapter( + agents::agent_workers_process_t & agent_workers_process, + sem_t & senderSem, + ISender & sender, + std::function session_ended_callback, // NOLINT(performance-unnecessary-value-param) + std::function exec_target_app_callback, + std::function profiling_started_callback, + const std::set & app_tids, + lib::Span uncore_pmus, + const perf_groups_configurer_state_t & perf_groups, + const agents::perf::buffer_config_t & ringbuffer_config, + bool enable_on_exec) +{ + auto cluster_keys_for_freq_counter = get_cpu_cluster_keys_for_cpu_frequency_counter(); + + std::vector gator_cpus; + for (const auto & cpu : mConfig.cpus) { + gator_cpus.push_back(cpu.gator_cpu); + } + + std::map cpu_to_spe_type; + for (auto & e : mConfig.cpuNumberToSpeType) { + cpu_to_spe_type[e.first] = static_cast(e.second); + } + + auto type_to_name_map = collect_pmu_type_to_name_map(mConfig); + + ipc::msg_capture_configuration_t config_msg = agents::perf::create_capture_configuration_msg( + gSessionData, + mConfig.config, + mCpuInfo, + cpu_to_spe_type, + cluster_keys_for_freq_counter, + uncore_pmus, + mPmuXml.cpus, + perf_groups, + ringbuffer_config, + type_to_name_map, + enable_on_exec, + // only use SIGSTOP pause when waiting for newly launched Android package + (gSessionData.mAndroidPackage != nullptr)); + + { + auto uid_gid = lib::resolve_uid_gid(gSessionData.mCaptureUser); + if (uid_gid && (!gSessionData.mCaptureCommand.empty())) { + agents::perf::add_command(config_msg, + gSessionData.mCaptureCommand, + gSessionData.mCaptureWorkingDir, + uid_gid->first, + uid_gid->second); + } + } + agents::perf::add_pids(config_msg, app_tids); + agents::perf::add_wait_for_process(config_msg, gSessionData.mWaitForProcessCommand); + + // start the agent worker and tell it to communicate with the source adapter + struct wait_state_t { + std::mutex ready_mutex {}; + std::condition_variable condition {}; + std::optional ready_worker {}; + std::optional ready_agent {}; + }; + + auto wait_state = std::make_shared(); + + auto source = std::make_unique( senderSem, - std::move(profilingStartedCallback), - [this](auto & a, auto b) { return summary(a, b); }, - [this](auto & a, auto b) { return coreName(a, b); }, - [this](auto & a, auto b) { return read(a, b); }, - std::move(appTids), - ftraceDriver, - enableOnCommandExec, - cpuInfo); - - if (!result->prepare()) { - return {}; + sender, + [wait_state](bool success) { + LOG_DEBUG("Received agent-ready notification, success=%u", success); + { + auto lock = std::unique_lock(wait_state->ready_mutex); + wait_state->ready_agent = success; + } + wait_state->condition.notify_one(); + }, + std::move(exec_target_app_callback), + std::move(profiling_started_callback)); + + agent_workers_process.async_add_perf_source(*source, std::move(config_msg), [wait_state](bool success) { + LOG_DEBUG("Received worker-ready notification, success=%u", success); + { + auto lock = std::unique_lock(wait_state->ready_mutex); + wait_state->ready_worker = success; + } + wait_state->condition.notify_one(); + }); + + LOG_DEBUG("Waiting for perf agent worker and agent to start"); + + { + constexpr std::chrono::milliseconds poll_session_ended_timeout {100}; + + auto lock = std::unique_lock(wait_state->ready_mutex); + while (wait_for_ready(wait_state->ready_worker, wait_state->ready_agent, session_ended_callback())) { + wait_state->condition.wait_for(lock, poll_session_ended_timeout); + } + + if ((!wait_state->ready_worker) || (!*(wait_state->ready_worker))) { + LOG_ERROR("Failed to start perf agent worker"); + handleException(); + } + + if (session_ended_callback()) { + // this is not an error; just Child.cpp will shutdown properly + LOG_DEBUG("Perf agent worker started, but agent not ready by time session ended"); + return source; + } + + if ((!wait_state->ready_agent) || (!*(wait_state->ready_agent))) { + LOG_ERROR("Failed to start perf agent"); + handleException(); + } } - return result; + LOG_DEBUG("Perf agent worker started"); + return source; } diff --git a/daemon/linux/perf/PerfEventGroup.cpp b/daemon/linux/perf/PerfEventGroup.cpp index 4a809a0b..ec45e75d 100644 --- a/daemon/linux/perf/PerfEventGroup.cpp +++ b/daemon/linux/perf/PerfEventGroup.cpp @@ -5,6 +5,7 @@ #include "DynBuf.h" #include "Logging.h" #include "SessionData.h" +#include "k/perf_event.h" #include "lib/Format.h" #include "lib/Syscall.h" #include "linux/perf/PerfUtils.h" @@ -13,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -23,102 +25,70 @@ namespace { constexpr unsigned long NANO_SECONDS_IN_ONE_SECOND = 1000000000UL; constexpr unsigned long NANO_SECONDS_IN_100_MS = 100000000UL; - - int sys_perf_event_open(struct perf_event_attr * const attr, - const pid_t pid, - const int cpu, - const int group_fd, - const unsigned long flags) + constexpr std::uint32_t MAX_SPE_WATERMARK = 2048U * 1024U; + constexpr std::uint32_t MIN_SPE_WATERMARK = 4096U; + + /** + * Dynamically adjust the aux_matermark value based on the sample frequency so that + * we collect data every 1/nth of a second, applying some sensible limits with respect + * to data size / processing cost on Streamline side. + * + * @param mmap_size The size of the aux mmap + * @param count The sampling frequency + * @return The aux_watermark value + */ + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) + constexpr std::uint32_t calculate_aux_watermark(std::size_t mmap_size, std::uint64_t count) { - int fd = lib::perf_event_open(attr, pid, cpu, group_fd, flags); - if (fd < 0) { - return -1; - } - int fdf = lib::fcntl(fd, F_GETFD); - if ((fdf == -1) || (lib::fcntl(fd, F_SETFD, fdf | FD_CLOEXEC) != 0)) { - lib::close(fd); - return -1; - } - return fd; + constexpr std::uint64_t fraction_of_second = 10; // 1/10s + + auto const frequency = std::max(NANO_SECONDS_IN_ONE_SECOND / count, 1); + auto const bps = (24 * frequency); // assume an average of 24 bytes per sample + + // wake up after ~(1/fraction) seconds worth of data, or 50% of buffer is full + auto const pref_watermark = std::min(mmap_size / 2, bps / fraction_of_second); + + // but ensure that the watermark is not too large as may be the case with high sample rate and large buffer + // as this can be problematic for Streamline in system-wide mode + return std::max(std::min(pref_watermark, MAX_SPE_WATERMARK), MIN_SPE_WATERMARK); } - bool readAndSend(id_to_key_mapping_tracker_t & mapping_tracker, - const struct perf_event_attr & attr, - const int fd, - const int keyCount, - const int * const keys) + /** + * Decode whether or no to set exclude_kernel (et al) + * + * @param type The attribute type + * @param config The attribute config + * @param exclude_requested Whether or not exclude was requested (either by perf_paranoid or by cli argument) + * @return The exclude_kernel bit value + */ + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) + constexpr bool should_exclude_kernel(std::uint32_t type, std::uint64_t config, bool exclude_requested) { - for (int retry = 0; retry < 10; ++retry) { - char buf[1024]; - ssize_t bytes = lib::read(fd, buf, sizeof(buf)); - if (bytes < 0) { - LOG_DEBUG("read failed"); - return false; - } - - if (bytes == 0) { - /* pinning failed, retry */ - usleep(1); - continue; - } + // don't need to exclude if it wasn't requested + if (!exclude_requested) { + return false; + } - mapping_tracker(keyCount, keys, bytes, buf); - return true; + // but should also not exclude for certain events + if (type == PERF_TYPE_SOFTWARE) { + return (config != PERF_COUNT_SW_CONTEXT_SWITCHES); } - /* not able to pin even, log and return true, data is skipped */ - LOG_ERROR("Could not pin event %u:0x%llx, skipping", attr.type, attr.config); return true; } - - std::string perfAttrToString(const perf_event_attr & attr, - const char * typeLabel, - const char * indentation, - const char * separator) - { - return (lib::Format() << indentation << "type: " << attr.type << " (" - << (typeLabel != nullptr ? typeLabel : "") << ")" << separator // - << indentation << "config: " << attr.config << separator // - << indentation << "config1: " << attr.config1 << separator // - << indentation << "config2: " << attr.config2 << separator // - << indentation << "sample: " << attr.sample_period << separator << std::hex // - << indentation << "sample_type: 0x" << attr.sample_type << separator // - << indentation << "read_format: 0x" << attr.read_format << separator << std::dec // - << indentation << "pinned: " << (attr.pinned ? "true" : "false") << separator // - << indentation << "mmap: " << (attr.mmap ? "true" : "false") << separator // - << indentation << "comm: " << (attr.comm ? "true" : "false") << separator // - << indentation << "freq: " << (attr.freq ? "true" : "false") << separator // - << indentation << "task: " << (attr.task ? "true" : "false") << separator // - << indentation << "exclude_kernel: " << (attr.exclude_kernel ? "true" : "false") - << separator // - << indentation << "enable_on_exec: " << (attr.enable_on_exec ? "true" : "false") - << separator // - << indentation << "inherit: " << (attr.inherit ? "true" : "false") << separator // - << indentation << "sample_id_all: " << (attr.sample_id_all ? "true" : "false") - << separator // - << indentation << "sample_regs_user: 0x" << std::hex << attr.sample_regs_user << separator - << std::dec // - << indentation << "aux_watermark: " << attr.aux_watermark << separator); - } } -bool perf_event_group_configurer_t::addEvent(const bool leader, - attr_to_key_mapping_tracker_t & mapping_tracker, - const int key, - const IPerfGroups::Attr & attr, - bool hasAuxData) +bool perf_event_group_configurer_t::initEvent(perf_event_group_configurer_config_t & config, + perf_event_t & event, + bool is_header, + bool requires_leader, + PerfEventGroupIdentifier::Type type, + const bool leader, + attr_to_key_mapping_tracker_t & mapping_tracker, + const int key, + const IPerfGroups::Attr & attr, + bool hasAuxData) { - if (leader && !state.common.events.empty()) { - assert(false && "Cannot set leader for non-empty group"); - return false; - } - if (state.common.events.size() >= INT_MAX) { - return false; - } - - state.common.events.emplace_back(); - perf_event_t & event = state.common.events.back(); - event.attr.size = sizeof(event.attr); /* Emit time, read_format below, group leader id, and raw tracepoint info */ const uint64_t sampleReadMask = @@ -158,28 +128,32 @@ bool perf_event_group_configurer_t::addEvent(const bool leader, #endif // make sure all new children are counted too - const bool use_inherit = (config.perfConfig.is_system_wide ? 0 : 1); + const bool use_inherit = !(config.perfConfig.is_system_wide || is_header); // group doesn't require a leader (so all events are stand alone) - const bool every_attribute_in_own_group = use_inherit || (!requiresLeader()); + const bool every_attribute_in_own_group = use_inherit || (!requires_leader) || is_header; // use READ_FORMAT_GROUP; only when the leader and not a stand alone event - const bool use_read_format_group = leader && (!use_inherit) && (!every_attribute_in_own_group); + const bool use_read_format_group = leader && (!use_inherit) && (!every_attribute_in_own_group) && (!is_header); + + // filter kernel events? + const bool exclude_kernel = should_exclude_kernel(attr.type, attr.config, config.excludeKernelEvents); // when running in application mode, inherit must always be set, in system wide mode, inherit must always be clear event.attr.inherit = use_inherit; event.attr.inherit_stat = event.attr.inherit; /* Emit emit value in group format */ // Unfortunately PERF_FORMAT_GROUP is not allowed with inherit - event.attr.read_format = PERF_FORMAT_ID | (use_read_format_group ? PERF_FORMAT_GROUP : 0); + event.attr.read_format = (use_read_format_group ? PERF_FORMAT_ID | PERF_FORMAT_GROUP // + : PERF_FORMAT_ID); // Always be on the CPU but only a perf_event_open group leader can be pinned // We can only use perf_event_open groups if PERF_FORMAT_GROUP is used to sample group members // If the group has no leader, then all members are in separate perf_event_open groups (and hence their own leader) - event.attr.pinned = ((leader || every_attribute_in_own_group) ? 1 : 0); + event.attr.pinned = ((leader || every_attribute_in_own_group || is_header) ? 1 : 0); // group leader must start disabled, all others enabled event.attr.disabled = event.attr.pinned; /* have a sampling interrupt happen when we cross the wakeup_watermark boundary */ event.attr.watermark = 1; /* Be conservative in flush size as only one buffer set is monitored */ - event.attr.wakeup_watermark = config.ringbuffer_config.dataBufferSize / 2; + event.attr.wakeup_watermark = config.ringbuffer_config.data_buffer_size / 2; /* Use the monotonic raw clock if possible */ event.attr.use_clockid = config.perfConfig.has_attr_clockid_support ? 1 : 0; event.attr.clockid = config.perfConfig.has_attr_clockid_support ? CLOCK_MONOTONIC_RAW : 0; @@ -190,22 +164,27 @@ bool perf_event_group_configurer_t::addEvent(const bool leader, event.attr.sample_period = attr.periodOrFreq; event.attr.mmap = attr.mmap; event.attr.comm = attr.comm; + event.attr.comm_exec = attr.comm && config.perfConfig.has_attr_comm_exec; event.attr.freq = attr.freq; event.attr.task = attr.task; /* sample_id_all should always be set (or should always match pinned); it is required for any non-grouped event, for grouped events it is ignored for anything but the leader */ event.attr.sample_id_all = 1; event.attr.context_switch = attr.context_switch; - event.attr.exclude_kernel = (config.excludeKernelEvents ? 1 : 0); - event.attr.exclude_hv = (config.excludeKernelEvents ? 1 : 0); - event.attr.exclude_idle = (config.excludeKernelEvents ? 1 : 0); - event.attr.aux_watermark = hasAuxData ? config.ringbuffer_config.auxBufferSize / 2 : 0; + event.attr.exclude_kernel = (exclude_kernel ? 1 : 0); + event.attr.exclude_hv = (exclude_kernel ? 1 : 0); + event.attr.exclude_idle = (exclude_kernel ? 1 : 0); + event.attr.exclude_callchain_kernel = + (config.excludeKernelEvents && config.perfConfig.has_exclude_callchain_kernel ? 1 : 0); + event.attr.aux_watermark = (hasAuxData ? calculate_aux_watermark(config.ringbuffer_config.aux_buffer_size, // + event.attr.sample_period) // + : 0); event.key = key; // [SDDAP-10625] - trace context switch information for SPE attributes. // it is required (particularly in system-wide mode) to be able to see // the boundarys of SPE data, as it is not guaranteed to get PERF_RECORD_ITRACE_START // between two processes if they are sampled by the same SPE attribute. - if (identifier.getType() == PerfEventGroupIdentifier::Type::SPE) { + if (type == PerfEventGroupIdentifier::Type::SPE) { if (!config.perfConfig.has_attr_context_switch) { LOG_ERROR("SPE requires context switch information"); return false; @@ -219,6 +198,35 @@ bool perf_event_group_configurer_t::addEvent(const bool leader, return true; } +bool perf_event_group_configurer_t::addEvent(const bool leader, + attr_to_key_mapping_tracker_t & mapping_tracker, + const int key, + const IPerfGroups::Attr & attr, + bool hasAuxData) +{ + if (leader && !state.events.empty()) { + assert(false && "Cannot set leader for non-empty group"); + return false; + } + if (state.events.size() >= INT_MAX) { + return false; + } + + state.events.emplace_back(); + perf_event_t & event = state.events.back(); + + return initEvent(config, + event, + false, + requiresLeader(), + identifier.getType(), + leader, + mapping_tracker, + key, + attr, + hasAuxData); +} + bool perf_event_group_configurer_t::createGroupLeader(attr_to_key_mapping_tracker_t & mapping_tracker) { switch (identifier.getType()) { @@ -283,7 +291,7 @@ bool perf_event_group_configurer_t::createCpuGroupLeader(attr_to_key_mapping_tra PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_READ | (enableCallChain ? PERF_SAMPLE_CALLCHAIN : 0); } } - else if (!config.excludeKernelEvents) { + else if (!config.perfConfig.exclude_kernel) { // use context switches as leader. this should give us 'switch-out' events attr.config = PERF_COUNT_SW_CONTEXT_SWITCHES; attr.periodOrFreq = 1; @@ -348,493 +356,7 @@ bool perf_event_group_configurer_t::createUncoreGroupLeader(attr_to_key_mapping_ return addEvent(true, mapping_tracker, nextDummyKey(), attr, false); } -int perf_event_group_configurer_t::nextDummyKey() +int perf_event_group_configurer_t::nextDummyKey(perf_event_group_configurer_config_t & config) { return config.dummyKeyCounter--; } - -static const char * selectTypeLabel(const char * groupLabel, std::uint32_t type) -{ - switch (type) { - case PERF_TYPE_HARDWARE: - return "cpu"; - case PERF_TYPE_BREAKPOINT: - return "breakpoint"; - case PERF_TYPE_HW_CACHE: - return "hw-cache"; - case PERF_TYPE_RAW: - return groupLabel; - case PERF_TYPE_SOFTWARE: - return "software"; - case PERF_TYPE_TRACEPOINT: - return "tracepoint"; - default: - return (type < PERF_TYPE_MAX ? "?" : groupLabel); - } -} - -std::pair perf_event_group_activator_t::onlineCPU( - int cpu, - std::set & tids, - OnlineEnabledState enabledState, - id_to_key_mapping_tracker_t & mapping_tracker, - const std::function & addToMonitor, - const std::function & addToBuffer) -{ - if (state.common.events.empty()) { - return std::make_pair(OnlineResult::SUCCESS, ""); - } - - const GatorCpu & cpuCluster = config.clusters[config.clusterIds[cpu]]; - const GatorCpu * cluster = identifier.getCluster(); - const UncorePmu * uncorePmu = identifier.getUncorePmu(); - const std::map * cpuNumberToType = identifier.getSpeTypeMap(); - - const char * groupLabel = "?"; - const char * deviceInstance = nullptr; - bool perCpu = false; - - // validate cpu - uint32_t replaceType = 0; - switch (identifier.getType()) { - case PerfEventGroupIdentifier::Type::PER_CLUSTER_CPU: { - groupLabel = cluster->getCoreName(); - perCpu = true; - if (!(*cluster == cpuCluster)) { - return std::make_pair(OnlineResult::SUCCESS, ""); - } - break; - } - - case PerfEventGroupIdentifier::Type::UNCORE_PMU: { - groupLabel = uncorePmu->getCoreName(); - deviceInstance = uncorePmu->getDeviceInstance(); - const std::set cpuMask = perf_utils::readCpuMask(uncorePmu->getId()); - const bool currentCpuNotInMask = ((!cpuMask.empty()) && (cpuMask.count(cpu) == 0)); - const bool maskIsEmptyAndCpuNotDefault = (cpuMask.empty() && (cpu != 0)); - if (currentCpuNotInMask || maskIsEmptyAndCpuNotDefault) { - // SKIP this core without marking an error - return std::make_pair(OnlineResult::SUCCESS, ""); - } - break; - } - - case PerfEventGroupIdentifier::Type::SPE: { - groupLabel = "SPE"; - perCpu = true; - const auto & type = cpuNumberToType->find(cpu); - if (type == cpuNumberToType->end()) { - return std::make_pair(OnlineResult::SUCCESS, ""); - } - replaceType = type->second; - break; - } - - case PerfEventGroupIdentifier::Type::SPECIFIC_CPU: { - groupLabel = cpuCluster.getCoreName(); - perCpu = true; - if (cpu != identifier.getCpuNumber()) { - return std::make_pair(OnlineResult::SUCCESS, ""); - ; - } - break; - } - - case PerfEventGroupIdentifier::Type::GLOBAL: { - groupLabel = "Global"; - break; - } - - default: { - assert(false && "Unexpected group type"); - return std::make_pair(OnlineResult::OTHER_FAILURE, "Unexpected group type"); - } - } - - const bool enableNow = (enabledState == OnlineEnabledState::ENABLE_NOW); - const bool enableOnExec = (enabledState == OnlineEnabledState::ENABLE_ON_EXEC); - - std::map> eventIndexToTidToFdMap; - - const std::size_t numberOfEvents = state.common.events.size(); - for (std::size_t eventIndex = 0; eventIndex < numberOfEvents; ++eventIndex) { - perf_event_t & event = state.common.events[eventIndex]; - - if (state.cpuToEventIndexToTidToFdMap[cpu].count(eventIndex) != 0) { - std::string message("CPU already online or not correctly cleaned up"); - return std::make_pair(OnlineResult::FAILURE, message); - } - - const char * typeLabel = selectTypeLabel(groupLabel, event.attr.type); - - // Note we are modifying the attr after we have marshalled it - // but we are assuming enable_on_exec will be ignored by Streamline - event.attr.enable_on_exec = (event.attr.pinned && enableOnExec) ? 1 : 0; - if (replaceType > 0) { - event.attr.type = replaceType; - } - - LOG_DEBUG("Opening attribute:\n" - " cpu: %i\n" - " key: %i\n" - " cluster: %s\n" - " index: %" PRIuPTR "\n" - " -------------\n" - "%s", - cpu, - event.key, - (cluster != nullptr ? cluster->getId() : (uncorePmu != nullptr ? uncorePmu->getId() : "")), - eventIndex, - perfAttrToString(event.attr, typeLabel, " ", "\n").c_str()); - - auto open_perf_event = - [&event, cpu](const int tid, const int groupLeaderFd, bool excl_kernel, bool excl_hv, bool excl_idle) { - event.attr.exclude_kernel = excl_kernel; - event.attr.exclude_hv = excl_hv; - event.attr.exclude_idle = excl_idle; - return sys_perf_event_open(&event.attr, - tid, - cpu, - groupLeaderFd, - // This is "(broken since Linux 2.6.35)" so can possibly be removed - // we use PERF_EVENT_IOC_SET_OUTPUT anyway - PERF_FLAG_FD_OUTPUT); - }; - - for (auto tidsIterator = tids.begin(); tidsIterator != tids.end();) { - const int tid = *tidsIterator; - - // This assumes that group leader is added first - const int groupLeaderFd = event.attr.pinned ? -1 : *(eventIndexToTidToFdMap.at(0).at(tid)); - - lib::AutoClosingFd fd; - - int peo_errno = 0; - - if (config.excludeKernelEvents) { - fd = open_perf_event(tid, groupLeaderFd, true, true, false); - - peo_errno = errno; - } - else { - // try with exclude_kernel clear - // open event - fd = open_perf_event(tid, groupLeaderFd, false, false, false); - - // take a copy of errno so that logging calls etc don't overwrite it - peo_errno = errno; - - // retry with just exclude_kernel set - if ((!fd) && (peo_errno == EACCES)) { - LOG_DEBUG("Failed when exclude_kernel == 0, retrying with exclude_kernel = 1"); - - // open event - fd = open_perf_event(tid, groupLeaderFd, true, false, false); - - // retry with exclude_kernel and all set - if ((!fd) && (peo_errno == EACCES)) { - LOG_DEBUG("Failed when exclude_kernel == 1, exclude_hv == 0, exclude_idle == 0, retrying " - "with all exclusions enabled"); - - // open event - fd = open_perf_event(tid, groupLeaderFd, true, true, true); - - // take a new copy of the errno if it failed - peo_errno = errno; - } - } - } - - LOG_DEBUG("perf_event_open: tid: %i, leader = %i -> fd = %i", tid, groupLeaderFd, *fd); - - if (!fd) { - LOG_DEBUG("failed (%d) %s", peo_errno, strerror(peo_errno)); - - if (peo_errno == ENODEV) { - // The core is offline - return std::make_pair(OnlineResult::CPU_OFFLINE, - "The event involves a feature not supported by the current CPU."); - } - if (peo_errno == ESRCH) { - // thread exited before we had chance to open event - tidsIterator = tids.erase(tidsIterator); - continue; - } - if ((peo_errno == ENOENT) && (!event.attr.pinned)) { - // This event doesn't apply to this CPU but should apply to a different one, e.g. bigLittle - goto skipOtherTids; - } - std::ostringstream stringStream; - - stringStream << "perf_event_open failed to online counter for " << typeLabel; - if (deviceInstance != nullptr) { - stringStream << " (" << deviceInstance << ")"; - } - stringStream << " with config=0x" << std::hex << event.attr.config << std::dec; - if (perCpu) { - stringStream << " on CPU " << cpu; - } - stringStream << ". Failure given was errno=" << peo_errno << " (" << strerror(peo_errno) << ")."; - - if (config.perfConfig.is_system_wide) { - if (peo_errno == EINVAL) { - switch (event.attr.type) { - case PERF_TYPE_BREAKPOINT: - case PERF_TYPE_SOFTWARE: - case PERF_TYPE_TRACEPOINT: - break; - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - case PERF_TYPE_RAW: - default: - stringStream - << "\n\nAnother process may be using the PMU counter, or the combination requested " - "may not be supported by the hardware. Try removing some events."; - break; - } - } - return std::make_pair(OnlineResult::FAILURE, stringStream.str()); - } - LOG_WARNING("%s", stringStream.str().c_str()); - } - else if (!addToBuffer(*fd, cpu, event.attr.aux_watermark != 0)) { - std::string message("PerfBuffer::useFd failed"); - if (config.perfConfig.is_system_wide) { - return std::make_pair(OnlineResult::FAILURE, message.c_str()); - } - LOG_DEBUG("PerfBuffer::useFd failed"); - } - else if (!addToMonitor(*fd)) { - std::string message("Monitor::add failed"); - return std::make_pair(OnlineResult::FAILURE, message.c_str()); - } - else { - eventIndexToTidToFdMap[eventIndex][tid] = std::move(fd); - } - - ++tidsIterator; - } - skipOtherTids:; - } - - if (config.perfConfig.has_ioctl_read_id) { - bool addedEvents = false; - std::vector coreKeys; - std::vector ids; - - for (const auto & eventIndexToTidToFdPair : eventIndexToTidToFdMap) { - const int eventIndex = eventIndexToTidToFdPair.first; - const perf_event_t & event = state.common.events.at(eventIndex); - const int key = event.key; - - for (const auto & tidToFdPair : eventIndexToTidToFdPair.second) { - const auto & fd = tidToFdPair.second; - - // get the id - uint64_t id = 0; - if (lib::ioctl(*fd, PERF_EVENT_IOC_ID, reinterpret_cast(&id)) != 0 && - // Workaround for running 32-bit gatord on 64-bit systems, kernel patch in the works - lib::ioctl(*fd, - (PERF_EVENT_IOC_ID & ~IOCSIZE_MASK) | (8 << _IOC_SIZESHIFT), - reinterpret_cast(&id)) - != 0) { - std::string message("ioctl failed"); - LOG_DEBUG("%s", message.c_str()); - return std::make_pair(OnlineResult::OTHER_FAILURE, message.c_str()); - } - - // store it - coreKeys.push_back(key); - ids.emplace_back(id); - - // log it - LOG_DEBUG("Perf id for key : %i, fd : %i --> %" PRIu64, key, *fd, id); - - addedEvents = true; - } - } - - if (!addedEvents) { - LOG_DEBUG("no events came online"); - } - - mapping_tracker(ids.size(), ids.data(), coreKeys.data()); - } - else { - std::vector keysInGroup; - - // send the ungrouped attributes, collect keys for grouped attributes - for (const auto & eventIndexToTidToFdPair : eventIndexToTidToFdMap) { - const int eventIndex = eventIndexToTidToFdPair.first; - const perf_event_t & event = state.common.events.at(eventIndex); - const bool isLeader = requiresLeader() && (eventIndex == 0); - - if (event.attr.pinned && !isLeader) { - for (const auto & tidToFdPair : eventIndexToTidToFdPair.second) { - const auto & fd = tidToFdPair.second; - if (!readAndSend(mapping_tracker, event.attr, *fd, 1, &event.key)) { - return std::make_pair(OnlineResult::OTHER_FAILURE, "read failed"); - } - } - } - else { - keysInGroup.push_back(event.key); - } - } - - assert((requiresLeader() || keysInGroup.empty()) && "Cannot read group items without leader"); - - // send the grouped attributes and their keys - if (!keysInGroup.empty()) { - const auto & event = state.common.events.at(0); - const auto & tidToFdMap = eventIndexToTidToFdMap.at(0); - for (const auto & tidToFdPair : tidToFdMap) { - const auto & fd = tidToFdPair.second; - if (!readAndSend(mapping_tracker, event.attr, *fd, keysInGroup.size(), keysInGroup.data())) { - return std::make_pair(OnlineResult::OTHER_FAILURE, "read failed"); - } - } - } - } - - if (enableNow) { - if (!enable(eventIndexToTidToFdMap) || !checkEnabled(eventIndexToTidToFdMap)) { - return std::make_pair(OnlineResult::OTHER_FAILURE, "Unable to enable a perf event"); - } - } - - // everything enabled successfully, move into map - state.cpuToEventIndexToTidToFdMap[cpu] = std::move(eventIndexToTidToFdMap); - - return std::make_pair(OnlineResult::SUCCESS, ""); -} - -bool perf_event_group_activator_t::offlineCPU(int cpu) -{ - auto & eventIndexToTidToFdMap = state.cpuToEventIndexToTidToFdMap[cpu]; - - // we disable in the opposite order that we enabled for some reason - const auto eventIndexToTidToFdRend = eventIndexToTidToFdMap.rend(); - for (auto eventIndexToTidToFdIt = eventIndexToTidToFdMap.rbegin(); eventIndexToTidToFdIt != eventIndexToTidToFdRend; - ++eventIndexToTidToFdIt) { - const auto & tidToFdMap = eventIndexToTidToFdIt->second; - const auto tidToFdRend = tidToFdMap.rend(); - for (auto tidToFdIt = tidToFdMap.rbegin(); tidToFdIt != tidToFdRend; ++tidToFdIt) { - const auto & fd = tidToFdIt->second; - if (lib::ioctl(*fd, PERF_EVENT_IOC_DISABLE, 0) != 0) { - LOG_DEBUG("ioctl failed"); - return false; - } - } - } - - // close all the fds - eventIndexToTidToFdMap.clear(); - - return true; -} - -bool perf_event_group_activator_t::enable( - const std::map> & eventIndexToTidToFdMap) -{ - // Enable group leaders, others should be enabled by default - for (const auto & eventIndexToTidToFdPair : eventIndexToTidToFdMap) { - const int eventIndex = eventIndexToTidToFdPair.first; - const perf_event_t & event = state.common.events.at(eventIndex); - - for (const auto & tidToFdPair : eventIndexToTidToFdPair.second) { - const auto & fd = tidToFdPair.second; - - if (event.attr.pinned && (lib::ioctl(*fd, PERF_EVENT_IOC_ENABLE, 0) != 0)) { - LOG_ERROR("Unable to enable a perf event"); - return false; - } - } - } - return true; -} - -bool perf_event_group_activator_t::checkEnabled( - const std::map> & eventIndexToTidToFdMap) -{ - // Try reading from all the group leaders to ensure that the event isn't disabled - char buf[1 << 10]; - int readResultCount = 0; - - // Enable group leaders, others should be enabled by default - for (const auto & eventIndexToTidToFdPair : eventIndexToTidToFdMap) { - const int eventIndex = eventIndexToTidToFdPair.first; - const perf_event_t & event = state.common.events.at(eventIndex); - - for (const auto & tidToFdPair : eventIndexToTidToFdPair.second) { - const auto tid = tidToFdPair.first; - const auto & fd = tidToFdPair.second; - - if (event.attr.pinned) { - const auto readResult = lib::read(*fd, buf, sizeof(buf)); - if (readResult < 0) { - LOG_ERROR("Unable to read all perf groups, perhaps too many events were enabled (%d, %s)", - errno, - strerror(errno)); - return false; - } - if (readResult == 0) { - ++readResultCount; - - LOG_WARNING("Unable to enable a perf group, pinned group marked as in disabled due to conflict " - "or insufficient resources. (%d: tid = %d, fd = %d, attr = \n%s)", - eventIndex, - tid, - *fd, - perfAttrToString(event.attr, nullptr, " ", "\n").c_str()); - } - } - } - } - - // log an error message on the console to the user telling them that some items were disabled. - if (readResultCount > 0) { - LOG_ERROR("Unable to enable %d perf groups due to them being reported as being disabled due to conflict or " - "insufficient resources.\n" - "Another process may be using one or more perf counters.\n" - "Use `lsof|grep perf_event` (if available) to find other processes that may be using perf counters.\n" - "Not all event data may be available in the capture.\n" - "See debug log for more information.", - readResultCount); - } - - return true; -} - -void perf_event_group_activator_t::start() -{ - // Enable everything before checking to avoid losing data - for (const auto & cpuToEventIndexToTidToFdPair : state.cpuToEventIndexToTidToFdMap) { - if (!enable(cpuToEventIndexToTidToFdPair.second)) { - handleException(); - } - } - for (const auto & cpuToEventIndexToTidToFdPair : state.cpuToEventIndexToTidToFdMap) { - if (!checkEnabled(cpuToEventIndexToTidToFdPair.second)) { - handleException(); - } - } -} - -void perf_event_group_activator_t::stop() -{ - for (const auto & cpuToEventIndexToTidToFdPair : state.cpuToEventIndexToTidToFdMap) { - const auto & eventIndexToTidToFdMap = cpuToEventIndexToTidToFdPair.second; - const auto eventIndexToTidToFdRend = eventIndexToTidToFdMap.rend(); - for (auto eventIndexToTidToFdIt = eventIndexToTidToFdMap.rbegin(); - eventIndexToTidToFdIt != eventIndexToTidToFdRend; - ++eventIndexToTidToFdIt) { - const auto & tidToFdMap = eventIndexToTidToFdIt->second; - const auto tidToFdRend = tidToFdMap.rend(); - for (auto tidToFdIt = tidToFdMap.rbegin(); tidToFdIt != tidToFdRend; ++tidToFdIt) { - const auto & fd = tidToFdIt->second; - lib::ioctl(*fd, PERF_EVENT_IOC_DISABLE, 0); - } - } - } -} diff --git a/daemon/linux/perf/PerfEventGroup.h b/daemon/linux/perf/PerfEventGroup.h index 0bea495a..6c8d9443 100644 --- a/daemon/linux/perf/PerfEventGroup.h +++ b/daemon/linux/perf/PerfEventGroup.h @@ -3,16 +3,15 @@ #ifndef INCLUDE_LINUX_PERF_PERF_EVENT_GROUP_H #define INCLUDE_LINUX_PERF_PERF_EVENT_GROUP_H -#include "Tracepoints.h" +#include "agents/perf/record_types.h" #include "k/perf_event.h" // Use a snapshot of perf_event.h as it may be more recent than what is on the target and if not newer features won't be supported anyways #include "lib/AutoClosingFd.h" #include "lib/Span.h" +#include "linux/Tracepoints.h" #include "linux/perf/IPerfGroups.h" -#include "linux/perf/PerfBuffer.h" #include "linux/perf/PerfConfig.h" #include "linux/perf/PerfEventGroupIdentifier.h" #include "linux/perf/attr_to_key_mapping_tracker.h" -#include "linux/perf/id_to_key_mapping_tracker.h" #include #include @@ -22,59 +21,45 @@ #include #include -class IPerfmapping_tracker; class GatorCpu; -enum class OnlineResult { - SUCCESS, - FAILURE, - CPU_OFFLINE, - OTHER_FAILURE, -}; - -enum class OnlineEnabledState { ENABLE_NOW, ENABLE_ON_EXEC, NOT_ENABLED }; - /** Configuration common to both the activator and configurer */ -struct perf_event_group_common_config_t { +struct perf_event_group_configurer_config_t { const PerfConfig & perfConfig; lib::Span clusters; lib::Span clusterIds; + agents::perf::buffer_config_t ringbuffer_config; + /// tracepoint ID for sched_switch or UNKNOWN_TRACE_POINT_ID + int64_t schedSwitchId; + int schedSwitchKey = std::numeric_limits::max(); + int dummyKeyCounter = std::numeric_limits::max() - 1; + int backtraceDepth; + int sampleRate; bool excludeKernelEvents; -}; + bool enablePeriodicSampling; -/** State used by the configurator class */ -struct perf_event_group_configurer_config_t : perf_event_group_common_config_t { inline perf_event_group_configurer_config_t(PerfConfig const & perfConfig, lib::Span clusters, lib::Span clusterIds, bool excludeKernelEvents, - perf_ringbuffer_config_t const & ringbuffer_config, + agents::perf::buffer_config_t const & ringbuffer_config, int64_t schedSwitchId, int backtraceDepth, int sampleRate, bool enablePeriodicSampling) - : perf_event_group_common_config_t {perfConfig, clusters, clusterIds, excludeKernelEvents}, + : perfConfig(perfConfig), + clusters(clusters), + clusterIds(clusterIds), ringbuffer_config(ringbuffer_config), schedSwitchId(schedSwitchId), backtraceDepth(backtraceDepth), sampleRate(sampleRate), + excludeKernelEvents(excludeKernelEvents), enablePeriodicSampling(enablePeriodicSampling) { } - - perf_ringbuffer_config_t ringbuffer_config; - /// tracepoint ID for sched_switch or UNKNOWN_TRACE_POINT_ID - int64_t schedSwitchId; - int schedSwitchKey = std::numeric_limits::max(); - int dummyKeyCounter = std::numeric_limits::max() - 1; - int backtraceDepth; - int sampleRate; - bool enablePeriodicSampling; }; -/** State used by the activator class */ -using perf_event_group_activator_config_t = perf_event_group_common_config_t; - /** The tuple of attr + gator key representing one event that is part of the capture */ struct perf_event_t { struct perf_event_attr attr; @@ -82,38 +67,18 @@ struct perf_event_t { }; /** The common state data for the activator and configurer; only this part gets serialized */ -struct perf_event_group_common_state_t { +struct perf_event_group_configurer_state_t { // list of events associated with the group, where the first must be the group leader // the list is held externally std::vector events {}; }; -/** The state data specific to the configurer */ -struct perf_event_group_configurer_state_t { - //just the common state, wrapped to make it the same as the activator state - perf_event_group_common_state_t common {}; -}; - -/** The state data specific to the activator */ -struct perf_event_group_activator_state_t { - // the common state - perf_event_group_common_state_t common {}; - // map from cpu -> (map from mEvents index -> (map from tid -> file descriptor)) - std::map>> cpuToEventIndexToTidToFdMap {}; - - perf_event_group_activator_state_t() = default; - - // allow construction from just the serialized part - explicit perf_event_group_activator_state_t(perf_event_group_common_state_t && common) : common(std::move(common)) - { - } -}; - -/** Common base for both PerfEventGroup and PerfEventGroupActivator */ -template -class perf_event_group_base_t { +/** Like perf_groups_configurer_t, anages the construction / specification of the set of perf event attributes required for some capture */ +class perf_event_group_configurer_t { public: - perf_event_group_base_t(ConfigType & config, PerfEventGroupIdentifier const & identifier, StateType & state) + perf_event_group_configurer_t(perf_event_group_configurer_config_t & config, + PerfEventGroupIdentifier const & identifier, + perf_event_group_configurer_state_t & state) : config(config), identifier(identifier), state(state) { } @@ -134,27 +99,20 @@ class perf_event_group_base_t { } } - [[nodiscard]] bool hasLeader() const { return requiresLeader() && (!state.common.events.empty()); } + [[nodiscard]] bool hasLeader() const { return requiresLeader() && (!state.events.empty()); } -protected: - ConfigType & config; - PerfEventGroupIdentifier const & identifier; - StateType & state; -}; + [[nodiscard]] static bool initEvent(perf_event_group_configurer_config_t & config, + perf_event_t & event, + bool is_header, + bool requires_leader, + PerfEventGroupIdentifier::Type type, + bool leader, + attr_to_key_mapping_tracker_t & mapping_tracker, + int key, + const IPerfGroups::Attr & attr, + bool hasAuxData); -/** Like perf_groups_configurer_t, anages the construction / specification of the set of perf event attributes required for some capture */ -class perf_event_group_configurer_t - : private perf_event_group_base_t { -public: - perf_event_group_configurer_t(perf_event_group_configurer_config_t & config, - PerfEventGroupIdentifier const & identifier, - perf_event_group_configurer_state_t & state) - : perf_event_group_base_t(config, identifier, state) - { - } - - using perf_event_group_base_t::hasLeader; - using perf_event_group_base_t::requiresLeader; + [[nodiscard]] static int nextDummyKey(perf_event_group_configurer_config_t & config); [[nodiscard]] bool addEvent(bool leader, attr_to_key_mapping_tracker_t & mapping_tracker, @@ -164,36 +122,13 @@ class perf_event_group_configurer_t [[nodiscard]] bool createGroupLeader(attr_to_key_mapping_tracker_t & mapping_tracker); private: + perf_event_group_configurer_config_t & config; + PerfEventGroupIdentifier const & identifier; + perf_event_group_configurer_state_t & state; + [[nodiscard]] bool createCpuGroupLeader(attr_to_key_mapping_tracker_t & mapping_tracker); [[nodiscard]] bool createUncoreGroupLeader(attr_to_key_mapping_tracker_t & mapping_tracker); - [[nodiscard]] int nextDummyKey(); -}; - -/** Like perf_groups_activator_t, provides the necessary functionality to activate the perf event attributes for some capture */ -class perf_event_group_activator_t - : private perf_event_group_base_t { -public: - perf_event_group_activator_t(perf_event_group_activator_config_t & config, - PerfEventGroupIdentifier const & identifier, - perf_event_group_activator_state_t & state) - : perf_event_group_base_t(config, identifier, state) - { - } - - std::pair onlineCPU(int cpu, - std::set & tids, - OnlineEnabledState enabledState, - id_to_key_mapping_tracker_t & mapping_tracker, - const std::function & addToMonitor, - const std::function & addToBuffer); - - bool offlineCPU(int cpu); - void start(); - void stop(); - -private: - bool enable(const std::map> & eventIndexToTidToFdMap); - bool checkEnabled(const std::map> & eventIndexToTidToFdMap); + [[nodiscard]] int nextDummyKey() { return nextDummyKey(config); } }; #endif /* INCLUDE_LINUX_PERF_PERF_EVENT_GROUP_H */ diff --git a/daemon/linux/perf/PerfGroups.cpp b/daemon/linux/perf/PerfGroups.cpp index 78491bbe..9ce264dd 100644 --- a/daemon/linux/perf/PerfGroups.cpp +++ b/daemon/linux/perf/PerfGroups.cpp @@ -3,6 +3,8 @@ #include "linux/perf/PerfGroups.h" #include "Logging.h" +#include "k/perf_event.h" +#include "lib/Assert.h" #include "linux/perf/PerfEventGroup.h" #include @@ -31,13 +33,38 @@ perf_event_group_configurer_t perf_groups_configurer_t::getGroup(attr_to_key_map LOG_DEBUG(" Group leader not created"); } else { - state.numberOfEventsAdded += it->second.common.events.size(); + state.numberOfEventsAdded += it->second.events.size(); } } return eventGroup; } +void perf_groups_configurer_t::initHeader(attr_to_key_mapping_tracker_t & mapping_tracker) +{ + IPerfGroups::Attr attr {}; + attr.type = PERF_TYPE_SOFTWARE; + attr.config = (configuration.perfConfig.has_count_sw_dummy ? PERF_COUNT_SW_DUMMY : PERF_COUNT_SW_CPU_CLOCK); + attr.periodOrFreq = 0; + attr.sampleType = 0; + attr.comm = true; + attr.task = true; + attr.mmap = true; + + auto result = perf_event_group_configurer_t::initEvent(configuration, + state.header, + true, + false, + PerfEventGroupIdentifier::Type::GLOBAL, + true, + mapping_tracker, + perf_event_group_configurer_t::nextDummyKey(configuration), + attr, + false); + + runtime_assert(result, "Failed to init header event"); +} + bool perf_groups_configurer_t::add(attr_to_key_mapping_tracker_t & mapping_tracker, const PerfEventGroupIdentifier & groupIdentifier, const int key, @@ -88,123 +115,3 @@ bool perf_groups_configurer_t::add(attr_to_key_mapping_tracker_t & mapping_track return eventGroup.addEvent(false, mapping_tracker, key, newAttr, hasAuxData); } - -bool perf_groups_activator_t::hasSPE() const -{ - for (const auto & pair : state.perfEventGroupMap) { - if (pair.first.getType() == PerfEventGroupIdentifier::Type::SPE) { - return true; - } - } - - return false; -} - -std::size_t perf_groups_activator_t::getMaxFileDescriptors() -{ - // Get the maximum amount of file descriptors that can be opened. - struct rlimit rlim; - if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) { - LOG_ERROR("getrlimit failed: %s", strerror(errno)); - handleException(); - } - - const rlim_t numberOfFdsReservedForGatord = 150; - // rlim_cur should've been set to rlim_max in main.cpp - if (rlim.rlim_cur < numberOfFdsReservedForGatord) { - LOG_ERROR("Not enough file descriptors to run gatord. Must have a minimum of %" PRIuMAX - " (currently the limit is %" PRIuMAX ").", - static_cast(numberOfFdsReservedForGatord), - static_cast(rlim.rlim_cur)); - handleException(); - } - - return std::size_t(rlim.rlim_cur - numberOfFdsReservedForGatord); -} - -std::pair perf_groups_activator_t::onlineCPU( - int cpu, - const std::set & appPids, - OnlineEnabledState enabledState, - id_to_key_mapping_tracker_t & mapping_tracker, - const std::function & addToMonitor, - const std::function & addToBuffer, - const std::function(int)> & childTids) -{ - LOG_DEBUG("Onlining cpu %i", cpu); - if (!configuration.perfConfig.is_system_wide && appPids.empty()) { - std::string message("No task given for non-system-wide"); - return std::make_pair(OnlineResult::FAILURE, message.c_str()); - } - - std::set tids {}; - if (configuration.perfConfig.is_system_wide) { - tids.insert(-1); - } - else { - for (int appPid : appPids) { - for (int tid : childTids(appPid)) { - tids.insert(tid); - } - } - } - - // Check to see if there are too many events/ not enough fds - // This is an over estimation because not every event will be opened. - const unsigned int amountOfEventsAboutToOpen = tids.size() * state.numberOfEventsAdded; - eventsOpenedPerCpu[cpu] = amountOfEventsAboutToOpen; - const unsigned int currentAmountOfEvents = std::accumulate( - std::begin(eventsOpenedPerCpu), - std::end(eventsOpenedPerCpu), - 0, - [](unsigned int total, const std::map::value_type & p) { return total + p.second; }); - - if (maxFiles < currentAmountOfEvents) { - LOG_ERROR("Not enough file descriptors for the amount of events requested."); - handleException(); - } - - for (auto & pair : state.perfEventGroupMap) { - perf_event_group_activator_t activator {configuration, pair.first, pair.second}; - const auto result = activator.onlineCPU(cpu, tids, enabledState, mapping_tracker, addToMonitor, addToBuffer); - if (result.first != OnlineResult::SUCCESS) { - return result; - } - } - return std::make_pair(OnlineResult::SUCCESS, ""); -} - -bool perf_groups_activator_t::offlineCPU(int cpu, const std::function & removeFromBuffer) -{ - LOG_DEBUG("Offlining cpu %i", cpu); - - for (auto & pair : state.perfEventGroupMap) { - perf_event_group_activator_t activator {configuration, pair.first, pair.second}; - if (!activator.offlineCPU(cpu)) { - return false; - } - } - - // Mark the buffer so that it will be released next time it's read - removeFromBuffer(cpu); - - eventsOpenedPerCpu.erase(cpu); - - return true; -} - -void perf_groups_activator_t::start() -{ - for (auto & pair : state.perfEventGroupMap) { - perf_event_group_activator_t activator {configuration, pair.first, pair.second}; - activator.start(); - } -} - -void perf_groups_activator_t::stop() -{ - for (auto & pair : state.perfEventGroupMap) { - perf_event_group_activator_t activator {configuration, pair.first, pair.second}; - activator.stop(); - } -} diff --git a/daemon/linux/perf/PerfGroups.h b/daemon/linux/perf/PerfGroups.h index 169c606d..2657a77d 100644 --- a/daemon/linux/perf/PerfGroups.h +++ b/daemon/linux/perf/PerfGroups.h @@ -3,11 +3,11 @@ #ifndef PERF_GROUPS_H #define PERF_GROUPS_H +#include "k/perf_event.h" #include "linux/perf/IPerfGroups.h" #include "linux/perf/PerfEventGroup.h" #include "linux/perf/PerfEventGroupIdentifier.h" #include "linux/perf/attr_to_key_mapping_tracker.h" -#include "linux/perf/id_to_key_mapping_tracker.h" #include #include @@ -18,38 +18,21 @@ class GatorCpu; /** The part of the state data that gets serialized */ -template -struct perf_groups_common_serialized_state_t { - std::map perfEventGroupMap {}; +struct perf_groups_configurer_state_t { + perf_event_t header {}; + std::map perfEventGroupMap {}; std::size_t numberOfEventsAdded {0}; - - /** Helper for converting once state type to another */ - template - static inline perf_groups_common_serialized_state_t convert_from( - perf_groups_common_serialized_state_t && from_state) - { - perf_groups_common_serialized_state_t result {}; - - for (auto & entry : from_state.perfEventGroupMap) { - result.perfEventGroupMap.emplace(entry.first, StateType {std::move(entry.second.common)}); - } - - result.numberOfEventsAdded = from_state.numberOfEventsAdded; - - return result; - } }; -using perf_groups_activator_state_t = perf_groups_common_serialized_state_t; -using perf_groups_configurer_state_t = perf_groups_common_serialized_state_t; - /** Manages the construction / specification of the set of perf event attributes required for some capture */ class perf_groups_configurer_t : public IPerfGroups { public: - perf_groups_configurer_t(perf_event_group_configurer_config_t & configuration, + perf_groups_configurer_t(attr_to_key_mapping_tracker_t & mapping_tracker, + perf_event_group_configurer_config_t & configuration, perf_groups_configurer_state_t & state) : configuration(configuration), state(state) { + initHeader(mapping_tracker); } bool add(attr_to_key_mapping_tracker_t & mapping_tracker, @@ -71,43 +54,8 @@ class perf_groups_configurer_t : public IPerfGroups { /// Get the group and create the group leader if needed perf_event_group_configurer_t getGroup(attr_to_key_mapping_tracker_t & mapping_tracker, const PerfEventGroupIdentifier & groupIdentifier); -}; -/** Provides the necessary functionality to activate the perf event attributes for some capture */ -class perf_groups_activator_t { -public: - static std::size_t getMaxFileDescriptors(); - - perf_groups_activator_t(perf_event_group_activator_config_t const & configuration, - perf_groups_activator_state_t & state, - std::size_t maxFiles = getMaxFileDescriptors()) - : configuration(configuration), state(state), maxFiles(maxFiles) - { - } - - [[nodiscard]] bool hasSPE() const; - - /** - * @param appPids ignored if system wide - * @note Not safe to call concurrently. - */ - std::pair onlineCPU(int cpu, - const std::set & appPids, - OnlineEnabledState enabledState, - id_to_key_mapping_tracker_t & mapping_tracker, - const std::function & addToMonitor, - const std::function & addToBuffer, - const std::function(int)> & childTids); - - bool offlineCPU(int cpu, const std::function & removeFromBuffer); - void start(); - void stop(); - -private: - perf_event_group_activator_config_t configuration; - perf_groups_activator_state_t & state; - std::map eventsOpenedPerCpu {}; - std::size_t maxFiles; + void initHeader(attr_to_key_mapping_tracker_t & mapping_tracker); }; #endif // PERF_GROUPS_H diff --git a/daemon/linux/perf/PerfSource.cpp b/daemon/linux/perf/PerfSource.cpp deleted file mode 100644 index caffeba6..00000000 --- a/daemon/linux/perf/PerfSource.cpp +++ /dev/null @@ -1,526 +0,0 @@ -/* Copyright (C) 2010-2022 by Arm Limited. All rights reserved. */ - -#define BUFFER_USE_SESSION_DATA - -#include "linux/perf/PerfSource.h" - -#include "Child.h" -#include "DynBuf.h" -#include "FtraceDriver.h" -#include "ICpuInfo.h" -#include "Logging.h" -#include "OlyUtility.h" -#include "Proc.h" -#include "Protocol.h" -#include "Sender.h" -#include "SessionData.h" -#include "lib/FileDescriptor.h" -#include "lib/Time.h" -#include "linux/perf/PerfAttrsBuffer.h" -#include "linux/perf/PerfCpuOnlineMonitor.h" -#include "linux/perf/PerfDriver.h" -#include "linux/perf/PerfSyncThreadBuffer.h" -#include "linux/perf/id_to_key_mapping_tracker.h" -#include "linux/proc/ProcessChildren.h" -#include "xml/PmuXML.h" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifndef SCHED_RESET_ON_FORK -#define SCHED_RESET_ON_FORK 0x40000000 -#endif - -perf_ringbuffer_config_t PerfSource::createPerfBufferConfig() -{ - return { - static_cast(gSessionData.mPageSize), - static_cast(gSessionData.mPerfMmapSizeInPages > 0 - ? gSessionData.mPageSize * gSessionData.mPerfMmapSizeInPages - : gSessionData.mTotalBufferSize * MEGABYTES), - static_cast(gSessionData.mPerfMmapSizeInPages > 0 - ? gSessionData.mPageSize * gSessionData.mPerfMmapSizeInPages - : gSessionData.mTotalBufferSize * MEGABYTES * 64), - }; -} - -PerfSource::PerfSource( - perf_event_group_activator_config_t const & configuration, - perf_groups_activator_state_t && state, - std::unique_ptr && attrs_buffer, - sem_t & senderSem, - std::function profilingStartedCallback, - std::function(ISummaryConsumer &, std::function)> sendSummaryFn, - std::function coreNameFn, - std::function readCountersFn, - std::set appTids, - FtraceDriver & ftraceDriver, - bool enableOnCommandExec, - ICpuInfo & cpuInfo) - : mConfig(configuration.perfConfig), - mSummary(MEGABYTES, senderSem), - mMemoryBuffer(16 * MEGABYTES, senderSem), - mPerfToMemoryBuffer(mMemoryBuffer, gSessionData.mOneShot), - mCountersBuf(createPerfBufferConfig()), - mCountersGroupState(std::move(state)), - mCountersGroup(configuration, mCountersGroupState), - mAppTids(std::move(appTids)), - mSenderSem(senderSem), - mAttrsBuffer(std::move(attrs_buffer)), - mProcBuffer(gSessionData.mTotalBufferSize * MEGABYTES, mSenderSem), - mProfilingStartedCallback(std::move(profilingStartedCallback)), - mSendSummaryFn(std::move(sendSummaryFn)), - mCoreNameFn(std::move(coreNameFn)), - mReadCountersFn(std::move(readCountersFn)), - mFtraceDriver(ftraceDriver), - mCpuInfo(cpuInfo) -{ - if ((!mConfig.is_system_wide) && (!mConfig.has_attr_clockid_support)) { - LOG_DEBUG("Tracing gatord as well as target application as no clock_id support"); - mAppTids.insert(getpid()); - } - - // allow self profiling -#if (defined(GATOR_SELF_PROFILE) && (GATOR_SELF_PROFILE != 0)) - const bool profileGator = true; -#else - const bool profileGator = (mAppTids.erase(0) != 0); // user can set --pid 0 to dynamically enable this feature -#endif - if (profileGator) { - // track child and parent process - mAppTids.insert(getpid()); - mAppTids.insert(getppid()); - } - - // was !enableOnCommandExec but this causes us to miss the exec comm record associated with the - // enable on exec doesn't work for cpu-wide events. - // additionally, when profiling gator, must be turned off - this->enableOnCommandExec = (enableOnCommandExec && !mConfig.is_system_wide && mConfig.has_attr_clockid_support - && mConfig.has_attr_comm_exec && !profileGator); -} - -bool PerfSource::prepare() -{ - - if (!mMonitor.init()) { - LOG_DEBUG("monitor setup failed"); - return false; - } - - int pipefd[2]; - if (lib::pipe_cloexec(pipefd) != 0) { - LOG_ERROR("pipe failed"); - return false; - } - mInterruptWrite = pipefd[1]; - mInterruptRead = pipefd[0]; - - if (!mMonitor.add(*mInterruptRead)) { - LOG_ERROR("Monitor::add failed"); - return false; - } - - // always try uevents, event as non-root, but continue if not supported - if (mUEvent.init() && !mMonitor.add(mUEvent.getFd())) { - LOG_DEBUG("uevent setup failed"); - return false; - } - - // must do this after mDriver::enable because of the SPE check - mSyncThread = PerfSyncThreadBuffer::create(mConfig.has_attr_clockid_support, mCountersGroup.hasSPE(), mSenderSem); - - // online them later - const OnlineEnabledState onlineEnabledState = - (enableOnCommandExec ? OnlineEnabledState::ENABLE_ON_EXEC : OnlineEnabledState::NOT_ENABLED); - int numOnlined = 0; - - for (size_t cpu = 0; cpu < mCpuInfo.getNumberOfCores(); ++cpu) { - using Result = OnlineResult; - id_to_key_mapping_tracker_t wrapper {*mAttrsBuffer}; - const std::pair result = mCountersGroup.onlineCPU( - cpu, - mAppTids, - onlineEnabledState, - wrapper, // - [this](int fd) -> bool { return mMonitor.add(fd); }, - [this](int fd, int cpu, bool hasAux) -> bool { return mCountersBuf.useFd(fd, cpu, hasAux); }, - &lnx::getChildTids); - switch (result.first) { - case Result::FAILURE: - LOG_ERROR("\n%s", result.second.c_str()); - handleException(); - break; - case Result::SUCCESS: - numOnlined++; - break; - default: - // do nothing - // why distinguish between FAILURE and OTHER_FAILURE? - break; - } - } - - if (numOnlined <= 0) { - LOG_DEBUG("PerfGroups::onlineCPU failed on all cores"); - } - - mAttrsBuffer->flush(); - - return true; -} - -std::optional PerfSource::sendSummary() -{ - // Send the summary right before the start so that the monotonic delta is close to the start time - auto montonicStart = mSendSummaryFn(mSummary, &getTime); - if (!montonicStart) { - LOG_ERROR("PerfDriver::summary failed"); - handleException(); - } - - return montonicStart; -} - -struct ProcThreadArgs { - PerfAttrsBuffer * mProcBuffer {nullptr}; - std::atomic_bool mIsDone {false}; -}; - -static void * procFunc(void * arg) -{ - const auto * const args = reinterpret_cast(arg); - - prctl(PR_SET_NAME, reinterpret_cast(&"gatord-proc"), 0, 0, 0); - - // Gator runs at a high priority, reset the priority to the default - if (setpriority(PRIO_PROCESS, syscall(__NR_gettid), 0) == -1) { - LOG_ERROR("setpriority failed"); - handleException(); - } - - if (!readProcMaps(*args->mProcBuffer)) { - LOG_ERROR("readProcMaps failed"); - handleException(); - } - - if (!readKallsyms(*args->mProcBuffer, args->mIsDone)) { - LOG_ERROR("readKallsyms failed"); - handleException(); - } - args->mProcBuffer->flush(); - - return nullptr; -} - -static const char CPU_DEVPATH[] = "/devices/system/cpu/cpu"; - -void PerfSource::run(std::uint64_t monotonicStart, std::function endSession) -{ - prctl(PR_SET_NAME, reinterpret_cast(&"gatord-perf"), 0, 0, 0); - - pthread_t procThread; - ProcThreadArgs procThreadArgs; - - { - DynBuf printb; - DynBuf b1; - - const uint64_t currTime = getTime() - monotonicStart; - LOG_DEBUG("run at current time: %" PRIu64, currTime); - - // Start events before reading proc to avoid race conditions - if (!enableOnCommandExec) { - mCountersGroup.start(); - } - - // This a bit fragile, we are assuming the driver will only write one counter per CPU - // which is true at the time of writing (just the cpu freq) - mAttrsBuffer->perfCounterHeader(currTime, mCpuInfo.getNumberOfCores()); - for (size_t cpu = 0; cpu < mCpuInfo.getNumberOfCores(); ++cpu) { - mReadCountersFn(*mAttrsBuffer, cpu); - } - mAttrsBuffer->perfCounterFooter(); - - if (!readProcSysDependencies(*mAttrsBuffer, &printb, &b1, mFtraceDriver)) { - if (mConfig.is_system_wide) { - LOG_ERROR("readProcSysDependencies failed"); - handleException(); - } - else { - LOG_DEBUG("readProcSysDependencies failed"); - } - } - mAttrsBuffer->flush(); - - // Postpone reading kallsyms as on android adb gets too backed up and data is lost - procThreadArgs.mProcBuffer = &mProcBuffer; - procThreadArgs.mIsDone = false; - if (pthread_create(&procThread, nullptr, procFunc, &procThreadArgs) != 0) { - LOG_ERROR("pthread_create failed"); - handleException(); - } - } - - // monitor online cores if no uevents - std::unique_ptr onlineMonitorThread; - if (!mUEvent.enabled()) { - onlineMonitorThread = std::make_unique([&](unsigned cpu, bool online) -> void { - LOG_DEBUG("CPU online state changed: %u -> %s", cpu, (online ? "online" : "offline")); - const uint64_t currTime = getTime() - monotonicStart; - if (online) { - handleCpuOnline(currTime, cpu); - } - else { - handleCpuOffline(currTime, cpu); - } - }); - } - - // start sync threads - if (mSyncThread != nullptr) { - mSyncThread->start(monotonicStart); - } - - // start profiling - mProfilingStartedCallback(); - - static constexpr uint64_t NO_RATE = ~0ULL; - const bool isLive = (gSessionData.mLiveRate > 0 && gSessionData.mSampleRate > 0); - const uint64_t rate = (isLive ? gSessionData.mLiveRate : NO_RATE); - int timeout = (rate != NO_RATE ? 0 : -1); - bool complete = false; - std::vector events; - while (!complete) { - // allocate enough space for all the FDs in the monitor - events.resize(std::min(2, mMonitor.size())); - - // wait for some events - const int ready = mMonitor.wait(events.data(), events.size(), timeout); - if (ready < 0) { - LOG_ERROR("Monitor::wait failed"); - handleException(); - } - - const uint64_t currTimeMonotonicDelta = (getTime() - monotonicStart); - - // validate the events - bool hasCoreData = false; - for (int i = 0; i < ready; ++i) { - if (events[i].data.fd == mUEvent.getFd()) { - if (!handleUEvent(currTimeMonotonicDelta)) { - LOG_ERROR("PerfSource::handleUEvent failed"); - handleException(); - } - } - else if (events[i].data.fd == *mInterruptRead) { - complete = true; - break; - } - else { - // at least one core has overflowed its watermark - hasCoreData |= ((events[i].events & EPOLLIN) == EPOLLIN); - - // remove error or expired items - if (((events[i].events & EPOLLHUP) == EPOLLHUP) || ((events[i].events & EPOLLERR) == EPOLLERR)) { - mMonitor.remove(events[i].data.fd); - } - } - } - - const bool liveTimedOut = (isLive && !hasCoreData); - - // send a notification that data is ready - // in live mode, we flush the perf ring buffer periodically so that the UI can - // show data in a timely manner. - // when complete, perform one final flush, regardless of whether or not the - // watermark is met - // otherwise just flush when a buffer watermark notification happens - if (liveTimedOut || complete || hasCoreData) { - if (!mCountersBuf.send(mPerfToMemoryBuffer)) { - LOG_ERROR("PerfBuffer::send failed"); - handleException(); - } - - if (isLive) { - mMemoryBuffer.flush(); - } - } - - // In one shot mode, stop collection once all the buffers are filled - if (!complete) { - if (gSessionData.mOneShot - && ((mSummary.bytesAvailable() <= 0) || (mAttrsBuffer->bytesAvailable() <= 0) - || (mProcBuffer.bytesAvailable() <= 0) || mPerfToMemoryBuffer.isFull())) { - LOG_DEBUG("One shot (perf)"); - endSession(); - } - - if (rate != NO_RATE) { - const auto nowMonotonicDelta = (getTime() - monotonicStart); - const auto nextExpectedMonotonicDelta = ((currTimeMonotonicDelta + rate - 1) / rate) * rate; - const auto nowMonotonicDeltaRoundedToRate = ((nowMonotonicDelta + rate - 1) / rate) * rate; - const auto nextMonotonicDelta = - (nextExpectedMonotonicDelta > nowMonotonicDelta ? nextExpectedMonotonicDelta - : nowMonotonicDeltaRoundedToRate); - - // + NS_PER_MS - 1 to ensure always rounding up - timeout = std::max(0, ((nextMonotonicDelta + NS_PER_MS - 1) - nowMonotonicDelta) / NS_PER_MS); - } - } - } - - if (onlineMonitorThread) { - onlineMonitorThread->terminate(); - } - - procThreadArgs.mIsDone = true; - pthread_join(procThread, nullptr); - - // stop all the perf events - mCountersGroup.stop(); - - // send any final remaining data now that the events are stopped - if (!mCountersBuf.send(mPerfToMemoryBuffer)) { - LOG_ERROR("PerfBuffer::send failed"); - handleException(); - } - - // terminate all remaining sync threads - if (mSyncThread != nullptr) { - mSyncThread->terminate(); - } - - // close off the buffer - mMemoryBuffer.flush(); - mPerfToMemoryBuffer.setDone(); - - mIsDone = true; - - // send a notification that data is ready - sem_post(&mSenderSem); -} - -bool PerfSource::handleUEvent(const uint64_t currTime) -{ - UEventResult result; - if (!mUEvent.read(&result)) { - LOG_DEBUG("UEvent::Read failed"); - return false; - } - - if (strcmp(result.mSubsystem, "cpu") == 0) { - if (strncmp(result.mDevPath, CPU_DEVPATH, sizeof(CPU_DEVPATH) - 1) != 0) { - LOG_DEBUG("Unexpected cpu DEVPATH format"); - return false; - } - int cpu; - if (!stringToInt(&cpu, result.mDevPath + sizeof(CPU_DEVPATH) - 1, 10)) { - LOG_DEBUG("stringToInt failed"); - return false; - } - - if (static_cast(cpu) >= mCpuInfo.getNumberOfCores()) { - LOG_ERROR("Only %zu cores are expected but core %i reports %s", - mCpuInfo.getNumberOfCores(), - cpu, - result.mAction); - handleException(); - } - - if (strcmp(result.mAction, "online") == 0) { - return handleCpuOnline(currTime, cpu); - } - if (strcmp(result.mAction, "offline") == 0) { - return handleCpuOffline(currTime, cpu); - } - } - - return true; -} - -bool PerfSource::handleCpuOnline(uint64_t currTime, unsigned cpu) -{ - bool ret; - - mAttrsBuffer->onlineCPU(currTime, cpu); - - id_to_key_mapping_tracker_t wrapper {*mAttrsBuffer}; - - const std::pair result = mCountersGroup.onlineCPU( - cpu, - mAppTids, - OnlineEnabledState::ENABLE_NOW, - wrapper, // - [this](int fd) -> bool { return mMonitor.add(fd); }, - [this](int fd, int cpu, bool hasAux) -> bool { return mCountersBuf.useFd(fd, cpu, hasAux); }, - &lnx::getChildTids); - - switch (result.first) { - case OnlineResult::SUCCESS: - // This a bit fragile, we are assuming the driver will only write one counter per CPU - // which is true at the time of writing (just the cpu freq) - mAttrsBuffer->perfCounterHeader(currTime, 1); - mReadCountersFn(*mAttrsBuffer, cpu); - mAttrsBuffer->perfCounterFooter(); - // fall through - /* no break */ - case OnlineResult::CPU_OFFLINE: - ret = true; - break; - default: - ret = false; - break; - } - - mAttrsBuffer->flush(); - - mCpuInfo.updateIds(true); - mCoreNameFn(mSummary, cpu); - mSummary.flush(); - return ret; -} - -bool PerfSource::handleCpuOffline(uint64_t currTime, unsigned cpu) -{ - const bool ret = mCountersGroup.offlineCPU(cpu, [this](int cpu) { mCountersBuf.discard(cpu); }); - mAttrsBuffer->offlineCPU(currTime, cpu); - return ret; -} - -void PerfSource::interrupt() -{ - int8_t c = 0; - // Write to the pipe to wake the monitor which will cause mSessionIsActive to be reread - if (::write(*mInterruptWrite, &c, sizeof(c)) != sizeof(c)) { - LOG_ERROR("write failed"); - handleException(); - } -} - -bool PerfSource::write(ISender & sender) -{ - // check mIsDone before we write so we guarantee the - // buffers won't have anymore added after we return - const bool done = mIsDone; - - mSummary.write(sender); - mAttrsBuffer->write(sender); - mProcBuffer.write(sender); - mPerfToMemoryBuffer.write(sender); - - // This is racey, unless we assume no one posts reader sem before profiling started - if (mSyncThread != nullptr) { - mSyncThread->send(sender); - } - - return done; -} diff --git a/daemon/linux/perf/PerfSource.h b/daemon/linux/perf/PerfSource.h deleted file mode 100644 index f8393c72..00000000 --- a/daemon/linux/perf/PerfSource.h +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright (C) 2010-2022 by Arm Limited. All rights reserved. */ - -#ifndef PERFSOURCE_H -#define PERFSOURCE_H - -#include "Buffer.h" -#include "Monitor.h" -#include "Source.h" -#include "SummaryBuffer.h" -#include "UEvent.h" -#include "lib/AutoClosingFd.h" -#include "linux/perf/PerfAttrsBuffer.h" -#include "linux/perf/PerfBuffer.h" -#include "linux/perf/PerfGroups.h" -#include "linux/perf/PerfSyncThreadBuffer.h" -#include "linux/perf/PerfToMemoryBuffer.h" - -#include -#include -#include -#include - -#include - -class ISender; -class FtraceDriver; -class ICpuInfo; - -static constexpr auto MEGABYTES = 1024 * 1024; - -class PerfSource : public PrimarySource { -public: - static perf_ringbuffer_config_t createPerfBufferConfig(); - - PerfSource(perf_event_group_activator_config_t const & configuration, - perf_groups_activator_state_t && state, - std::unique_ptr && attrs_buffer, - sem_t & senderSem, - std::function profilingStartedCallback, - std::function(ISummaryConsumer &, std::function)> sendSummaryFn, - std::function coreNameFn, - std::function readCountersFn, - std::set appTids, - FtraceDriver & ftraceDriver, - bool enableOnCommandExec, - ICpuInfo & cpuInfo); - - // Intentionally undefined - PerfSource(const PerfSource &) = delete; - PerfSource & operator=(const PerfSource &) = delete; - PerfSource(PerfSource &&) = delete; - PerfSource & operator=(PerfSource &&) = delete; - - bool prepare(); - std::optional sendSummary() override; - void run(std::uint64_t, std::function endSession) override; - void interrupt() override; - bool write(ISender & sender) override; - -private: - PerfConfig const & mConfig; - SummaryBuffer mSummary; - Buffer mMemoryBuffer; - PerfToMemoryBuffer mPerfToMemoryBuffer; - PerfBuffer mCountersBuf; - perf_groups_activator_state_t mCountersGroupState; - perf_groups_activator_t mCountersGroup; - Monitor mMonitor {}; - UEvent mUEvent {}; - std::set mAppTids; - sem_t & mSenderSem; - std::unique_ptr mAttrsBuffer; - PerfAttrsBuffer mProcBuffer; - std::function mProfilingStartedCallback; - std::function(ISummaryConsumer &, std::function)> mSendSummaryFn; - std::function mCoreNameFn; - std::function mReadCountersFn; - lib::AutoClosingFd mInterruptRead {}; - lib::AutoClosingFd mInterruptWrite {}; - std::atomic_bool mIsDone {false}; - FtraceDriver & mFtraceDriver; - ICpuInfo & mCpuInfo; - std::unique_ptr mSyncThread {}; - bool enableOnCommandExec {false}; - - bool handleUEvent(uint64_t currTime); - bool handleCpuOnline(uint64_t currTime, unsigned cpu); - bool handleCpuOffline(uint64_t currTime, unsigned cpu); -}; - -#endif // PERFSOURCE_H diff --git a/daemon/linux/perf/PerfSyncThread.cpp b/daemon/linux/perf/PerfSyncThread.cpp index 4d9a7203..76de1902 100644 --- a/daemon/linux/perf/PerfSyncThread.cpp +++ b/daemon/linux/perf/PerfSyncThread.cpp @@ -6,6 +6,7 @@ #include "lib/Assert.h" #include "lib/GenericTimer.h" #include "lib/String.h" +#include "lib/Syscall.h" #include #include @@ -100,7 +101,7 @@ void PerfSyncThread::run(std::uint64_t monotonicRawBase) noexcept { // get pid and tid const pid_t pid = getpid(); - const pid_t tid = syscall(__NR_gettid); + const pid_t tid = lib::gettid(); // change thread priority { diff --git a/daemon/linux/perf/PerfSyncThreadBuffer.cpp b/daemon/linux/perf/PerfSyncThreadBuffer.cpp deleted file mode 100644 index 973a25ba..00000000 --- a/daemon/linux/perf/PerfSyncThreadBuffer.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright (C) 2018-2021 by Arm Limited. All rights reserved. */ - -#define BUFFER_USE_SESSION_DATA - -#include "linux/perf/PerfSyncThreadBuffer.h" - -#include "BufferUtils.h" -#include "ISender.h" -#include "SessionData.h" - -std::unique_ptr PerfSyncThreadBuffer::create(bool supportsClockId, - bool hasSPEConfiguration, - sem_t & senderSem) -{ - std::unique_ptr result; - - // fill result - if (hasSPEConfiguration || !supportsClockId) { - const bool enableSyncThreadMode = (!supportsClockId); - const bool readTimer = hasSPEConfiguration; - result = std::make_unique(enableSyncThreadMode, readTimer, senderSem); - } - - return result; -} - -PerfSyncThreadBuffer::PerfSyncThreadBuffer(bool enableSyncThreadMode, bool readTimer, sem_t & readerSem) - : buffer(1024 * 1024, readerSem), - thread(enableSyncThreadMode, - readTimer, - [this](pid_t p, pid_t t, std::uint64_t f, std::uint64_t cmr, std::uint64_t vcnt) { - write(p, t, cmr, vcnt, f); - }) -{ -} - -void PerfSyncThreadBuffer::start(std::uint64_t monotonicRawBase) -{ - thread.start(monotonicRawBase); -} - -void PerfSyncThreadBuffer::terminate() -{ - thread.terminate(); -} - -void PerfSyncThreadBuffer::write(pid_t pid, - pid_t tid, - std::uint64_t monotonicRaw, - std::uint64_t vcnt, - std::uint64_t freq) -{ - // make sure there is space for at least one more record - const int minBytesRequired = IRawFrameBuilder::MAX_FRAME_HEADER_SIZE + buffer_utils::MAXSIZE_PACK32 - + ((1 * buffer_utils::MAXSIZE_PACK64) + (3 * buffer_utils::MAXSIZE_PACK32)) - + (2 * buffer_utils::MAXSIZE_PACK64); - - // wait for write space - buffer.waitForSpace(minBytesRequired); - - buffer.beginFrame(FrameType::PERF_SYNC); - buffer.packInt(0); // just pass CPU == 0, Since Streamline 7.4 it is ignored anyway - - // write header - buffer.packInt(pid); - buffer.packInt(tid); - buffer.packInt64(freq); - - // write record - buffer.packInt64(monotonicRaw); - buffer.packInt64(vcnt); - - buffer.endFrame(); - // commit data (always do this so that the record is pushed to the host in live mode in a timely fashion) - buffer.flush(); -} - -void PerfSyncThreadBuffer::send(ISender & sender) -{ - buffer.write(sender); -} diff --git a/daemon/linux/perf/PerfSyncThreadBuffer.h b/daemon/linux/perf/PerfSyncThreadBuffer.h deleted file mode 100644 index cea5273b..00000000 --- a/daemon/linux/perf/PerfSyncThreadBuffer.h +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright (C) 2018-2021 by Arm Limited. All rights reserved. */ - -#ifndef INCLUDE_LINUX_PERF_PERFSYNCTHREADBUFFER_H -#define INCLUDE_LINUX_PERF_PERFSYNCTHREADBUFFER_H - -#include "Buffer.h" -#include "linux/perf/PerfSyncThread.h" - -#include -#include -#include - -#include -#include - -class ISender; - -class PerfSyncThreadBuffer { -public: - /** - * Factory method, creates appropriate number of sync thread objects - * - * @param supportsClockId True if the kernel perf API supports configuring clock_id - * @param hasSPEConfiguration True if the user selected at least one SPE configuration - * @return The buffer object - */ - static std::unique_ptr create(bool supportsClockId, - bool hasSPEConfiguration, - sem_t & senderSem); - - /** - * Constructor - * - * @param enableSyncThreadMode True to enable 'gatord-sync' thread mode - * @param readTimer True to read the arch timer, false otherwise - * @param readerSem The buffer reader semaphore - */ - PerfSyncThreadBuffer(bool enableSyncThreadMode, bool readTimer, sem_t & readerSem); - - /** - * Start thread - * @param monotonicRawBase The monotonic raw value that equates to monotonic delta 0 - */ - void start(std::uint64_t monotonicRawBase); - - /** - * Stop and join thread - */ - void terminate(); - - /** - * Write buffer to sender - */ - void send(ISender & sender); - -private: - Buffer buffer; - PerfSyncThread thread; - - void write(pid_t pid, pid_t tid, std::uint64_t monotonicRaw, std::uint64_t vcnt, std::uint64_t freq); -}; - -#endif /* INCLUDE_LINUX_PERF_PERFSYNCTHREADBUFFER_H */ diff --git a/daemon/linux/perf/PerfToMemoryBuffer.cpp b/daemon/linux/perf/PerfToMemoryBuffer.cpp deleted file mode 100644 index 0d8d1f7d..00000000 --- a/daemon/linux/perf/PerfToMemoryBuffer.cpp +++ /dev/null @@ -1,175 +0,0 @@ -/* Copyright (C) 2020-2021 by Arm Limited. All rights reserved. */ - -#include "linux/perf/PerfToMemoryBuffer.h" - -#include "BufferUtils.h" -#include "ISender.h" -#include "lib/Assert.h" - -PerfToMemoryBuffer::PerfToMemoryBuffer(IRawFrameBuilderWithDirectAccess & builder, - IBufferControl & controller, - bool oneShot) - : builder(builder), controller(controller), bufferSem(), full(false), done(false), oneShot(oneShot) -{ - sem_init(&bufferSem, 0, 0); -} - -bool PerfToMemoryBuffer::waitFor(std::size_t bytes) -{ - while (std::size_t(builder.bytesAvailable()) <= bytes) { - if (oneShot || done) { - full = true; - return false; - } - builder.flush(); - sem_wait(&bufferSem); - } - return true; -} - -bool PerfToMemoryBuffer::isFull() const -{ - return full || controller.isFull(); -} - -void PerfToMemoryBuffer::setDone() -{ - controller.setDone(); - done = true; - sem_post(&bufferSem); -} - -bool PerfToMemoryBuffer::write(ISender & sender) -{ - const auto result = controller.write(sender); - sem_post(&bufferSem); - return result; -} - -void PerfToMemoryBuffer::consumePerfAuxRecord(int cpu, - std::uint64_t auxTailValue, - lib::Span recordChunks) -{ - static constexpr int MAX_HEADER_SIZE = buffer_utils::MAXSIZE_PACK32 // frame type - + buffer_utils::MAXSIZE_PACK32 // cpu - + buffer_utils::MAXSIZE_PACK64 // tail - + buffer_utils::MAXSIZE_PACK32; // size - static constexpr int MAX_FRAME_SIZE = ISender::MAX_RESPONSE_LENGTH - MAX_HEADER_SIZE; - - // skip if complete - if (full) { - return; - } - - for (const auto & recordChunk : recordChunks) { - for (std::size_t offset = 0; offset < recordChunk.byteCount;) { - if (!waitFor(MAX_HEADER_SIZE)) { - return; - } - - const std::size_t bytesRemaining = recordChunk.byteCount - offset; - const int maxWriteLength = std::min(bytesRemaining, MAX_FRAME_SIZE); - const int actualWriteLength = std::min(maxWriteLength, builder.bytesAvailable() - MAX_HEADER_SIZE); - - if (actualWriteLength <= 0) { - runtime_assert(actualWriteLength == 0, "Negative write length???"); - continue; - } - - builder.beginFrame(FrameType::PERF_AUX); - builder.packInt(cpu); - builder.packInt64(auxTailValue); - builder.packInt(actualWriteLength); - builder.writeBytes(recordChunk.chunkPointer + offset, actualWriteLength); - builder.endFrame(); - - offset += actualWriteLength; - auxTailValue += actualWriteLength; - } - } -} - -void PerfToMemoryBuffer::consumePerfDataRecord(int cpu, lib::Span recordChunks) -{ - static constexpr int MAX_HEADER_SIZE = buffer_utils::MAXSIZE_PACK32 // frame type - + buffer_utils::MAXSIZE_PACK32 // cpu - + 4; // blob length - - // skip if complete - if (full) { - return; - } - - static_assert(sizeof(IPerfBufferConsumer::data_word_t) == 8, "Expected word size is 64-bit"); - - bool inFrame = false; - int lengthWriteIndex = 0; - std::uint32_t totalWrittenSinceFrameEnd = 0; - for (const auto & recordChunk : recordChunks) { - const std::size_t totalWordCount = - recordChunk.firstChunk.wordCount - + (recordChunk.optionalSecondChunk.chunkPointer != nullptr ? recordChunk.optionalSecondChunk.wordCount : 0); - const std::size_t requiredBytesForRecord = totalWordCount * buffer_utils::MAXSIZE_PACK64; - - // are we in a frame, is there space to push another record? - if (inFrame) { - if (std::size_t(builder.bytesAvailable()) >= requiredBytesForRecord) { - // yes, append the frame data and continue - totalWrittenSinceFrameEnd += appendData(recordChunk); - continue; - } - // no, just end the current frame - endDataFrame(lengthWriteIndex, totalWrittenSinceFrameEnd); - inFrame = false; - totalWrittenSinceFrameEnd = 0; - } - - const std::size_t totalRequiredBytes = MAX_HEADER_SIZE + requiredBytesForRecord; - if (!waitFor(totalRequiredBytes)) { - return; - } - - // write the header - builder.beginFrame(FrameType::PERF_DATA); - builder.packInt(cpu); - lengthWriteIndex = builder.getWriteIndex(); - builder.advanceWrite(4); // skip the length field for now - - // write the record - inFrame = true; - totalWrittenSinceFrameEnd = appendData(recordChunk); - } - - if (inFrame) { - endDataFrame(lengthWriteIndex, totalWrittenSinceFrameEnd); - } -} - -void PerfToMemoryBuffer::endDataFrame(int lengthWriteIndex, std::uint32_t totalWrittenSinceFrameEnd) -{ - const char lengthBuffer[4] = {char(totalWrittenSinceFrameEnd >> 0), - char(totalWrittenSinceFrameEnd >> 8), - char(totalWrittenSinceFrameEnd >> 16), - char(totalWrittenSinceFrameEnd >> 24)}; - - builder.writeDirect(lengthWriteIndex, lengthBuffer, 4); - builder.endFrame(); -} - -std::uint32_t PerfToMemoryBuffer::appendData(const DataRecordChunkTuple & recordChunk) -{ - return appendData(recordChunk.firstChunk) + appendData(recordChunk.optionalSecondChunk); -} - -std::uint32_t PerfToMemoryBuffer::appendData(const DataRecordChunk & recordChunk) -{ - std::uint32_t result = 0; - - if (recordChunk.chunkPointer != nullptr) { - for (std::size_t index = 0; index < recordChunk.wordCount; ++index) { - result += builder.packInt64(recordChunk.chunkPointer[index]); - } - } - - return result; -} diff --git a/daemon/linux/perf/PerfToMemoryBuffer.h b/daemon/linux/perf/PerfToMemoryBuffer.h deleted file mode 100644 index 3e7373a3..00000000 --- a/daemon/linux/perf/PerfToMemoryBuffer.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright (C) 2020-2021 by Arm Limited. All rights reserved. */ - -#pragma once - -#include "Buffer.h" -#include "IBufferControl.h" -#include "IRawFrameBuilder.h" -#include "linux/perf/IPerfBufferConsumer.h" - -#include - -#include - -class PerfToMemoryBuffer : public IPerfBufferConsumer, public IBufferControl { -public: - PerfToMemoryBuffer(Buffer & buffer, bool oneShot) : PerfToMemoryBuffer(buffer, buffer, oneShot) {} - PerfToMemoryBuffer(IRawFrameBuilderWithDirectAccess & builder, IBufferControl & controller, bool oneShot); - - void consumePerfAuxRecord(int cpu, - std::uint64_t auxTailValue, - lib::Span recordChunks) override; - void consumePerfDataRecord(int cpu, lib::Span recordChunks) override; - - bool write(ISender & sender) override; - bool isFull() const override; - void setDone() override; - -private: - IRawFrameBuilderWithDirectAccess & builder; - IBufferControl & controller; - sem_t bufferSem; - std::atomic full; - std::atomic done; - bool oneShot; - - bool waitFor(std::size_t bytes); - void endDataFrame(int lengthWriteIndex, std::uint32_t totalWrittenSinceFrameEnd); - std::uint32_t appendData(const DataRecordChunkTuple & recordChunk); - std::uint32_t appendData(const DataRecordChunk & recordChunk); -}; diff --git a/daemon/linux/perf/id_to_key_mapping_tracker.h b/daemon/linux/perf/id_to_key_mapping_tracker.h deleted file mode 100644 index 087b32f4..00000000 --- a/daemon/linux/perf/id_to_key_mapping_tracker.h +++ /dev/null @@ -1,24 +0,0 @@ -/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ - -#pragma once - -#include "k/perf_event.h" // Use a snapshot of perf_event.h as it may be more recent than what is on the target and if not newer features won't be supported anyways -#include "linux/perf/IPerfAttrsConsumer.h" - -/** - * Wrap the IPerfAttrsConsumer, calling marshalKeys/marshalKeysOld. Later this will just accumulate the values into a vector to allow being sent later via some async message. - */ -class id_to_key_mapping_tracker_t { -public: - explicit constexpr id_to_key_mapping_tracker_t(IPerfAttrsConsumer & consumer) : consumer(consumer) {} - - void operator()(int count, const uint64_t * ids, const int * keys) { consumer.marshalKeys(count, ids, keys); } - - void operator()(int keyCount, const int * keys, int bytes, const char * buf) - { - consumer.marshalKeysOld(keyCount, keys, bytes, buf); - } - -private: - IPerfAttrsConsumer & consumer; -}; diff --git a/daemon/linux/proc/ProcessChildren.cpp b/daemon/linux/proc/ProcessChildren.cpp index a821bb45..43202386 100644 --- a/daemon/linux/proc/ProcessChildren.cpp +++ b/daemon/linux/proc/ProcessChildren.cpp @@ -2,29 +2,39 @@ #include "linux/proc/ProcessChildren.h" +#include "Logging.h" #include "lib/String.h" +#include #include #include #include #include +#include namespace lnx { - static void addTidsRecursively(std::set & tids, int tid) + // NOLINTNEXTLINE(misc-no-recursion) + void addTidsRecursively(std::set & tids, int tid, bool including_children) { + constexpr std::size_t buffer_size = 64; // should be large enough for the proc path + auto result = tids.insert(tid); if (!result.second) { return; // we've already added this and its children } + lib::printf_str_t filename {}; + // try to get all children (forked processes), available since Linux 3.5 - lib::printf_str_t<64> filename {"/proc/%d/task/%d/children", tid, tid}; - std::ifstream children {filename, std::ios_base::in}; - if (children) { - int child; - while (children >> child) { - addTidsRecursively(tids, child); + if (including_children) { + filename.printf("/proc/%d/task/%d/children", tid, tid); + std::ifstream children {filename, std::ios_base::in}; + if (children) { + int child; + while (children >> child) { + addTidsRecursively(tids, child, true); + } } } @@ -41,17 +51,78 @@ namespace lnx { if (std::strcmp(taskEntry->d_name, ".") != 0 && std::strcmp(taskEntry->d_name, "..") != 0) { const auto child = std::strtol(taskEntry->d_name, nullptr, 10); if (child > 0) { - tids.insert(child); + tids.insert(pid_t(child)); } } } } } - std::set getChildTids(int tid) + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) + std::set stop_all_tids(std::set const & pids, + std::set const & filter_set, + std::map & paused_tids) { - std::set result; - addTidsRecursively(result, tid); + constexpr unsigned sleep_usecs = 100; + + std::set result {}; + bool modified {true}; + + // repeat until no new items detected + while (modified && !pids.empty()) { + // clear modified for next iteration + modified = false; + + // first find any children + std::set tids {}; + for (pid_t pid : pids) { + addTidsRecursively(tids, pid, true); + } + // then sigstop them all + for (pid_t tid : tids) { + // already stopped ? + if (paused_tids.count(tid) > 0) { + // record it in the result as it is still a tracked pid + result.insert(tid); + // but no need to stop it again + continue; + } + + // to be ignored ? + if (filter_set.count(tid) > 0) { + // just skip it + continue; + } + + // stop it? + if (lib::kill(tid, SIGSTOP) == -1) { + // error + auto const error = errno; + + // add it to the map with an empty entry so as not to poll it again, but dont set modified + LOG_DEBUG("Could not SIGSTOP %d due to errno=%d", tid, error); + paused_tids.emplace(tid, sig_continuer_t {}); + + // add it to 'result' if exited + if (error != ESRCH) { + result.insert(tid); + } + } + else { + LOG_DEBUG("Successfully stopped %d", tid); + // success + paused_tids.emplace(tid, sig_continuer_t {tid}); + result.insert(tid); + modified = true; + } + } + + // sleep some tiny amount of time so that the signals can propogate before checking again + if (modified) { + usleep(sleep_usecs); + } + } + return result; } } diff --git a/daemon/linux/proc/ProcessChildren.h b/daemon/linux/proc/ProcessChildren.h index 59210fe3..aa222597 100644 --- a/daemon/linux/proc/ProcessChildren.h +++ b/daemon/linux/proc/ProcessChildren.h @@ -1,17 +1,75 @@ -/* Copyright (C) 2018-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2018-2022 by Arm Limited. All rights reserved. */ #ifndef INCLUDE_LINUX_PROC_PROCESS_CHILDREN_H #define INCLUDE_LINUX_PROC_PROCESS_CHILDREN_H +#include "lib/Syscall.h" +#include "unistd.h" + +#include +#include #include +#include namespace lnx { + /** + * Inherently racey function to collect child tids because threads can be created and destroyed while this is running + */ + void addTidsRecursively(std::set & tids, int tid, bool including_children); + /** * Inherently racey function to collect child tids because threads can be created and destroyed while this is running * * @return as many of the known child tids (including child processes) */ - std::set getChildTids(int tid); + inline std::set getChildTids(int tid, bool including_children) + { + std::set result; + addTidsRecursively(result, tid, including_children); + return result; + } + + /** RAII object that sends SIGCONT to some pid on request or dtor */ + class sig_continuer_t { + public: + constexpr sig_continuer_t() = default; + explicit constexpr sig_continuer_t(pid_t pid) : pid(pid) {} + + // not copyable + sig_continuer_t(sig_continuer_t const &) = delete; + sig_continuer_t & operator=(sig_continuer_t const &) = delete; + + // only movable + sig_continuer_t(sig_continuer_t && that) noexcept : pid(std::exchange(that.pid, 0)) {} + sig_continuer_t & operator=(sig_continuer_t && that) noexcept + { + if (this != &that) { + sig_continuer_t tmp {std::move(that)}; + std::swap(pid, tmp.pid); + } + return *this; + } + + // destructor sends sigcont + ~sig_continuer_t() noexcept { signal(); } + + /** sent sigcont to the target pid */ + void signal() noexcept + { + pid_t pid {std::exchange(this->pid, 0)}; + if (pid != 0) { + lib::kill(pid, SIGCONT); + } + } + + private: + pid_t pid {0}; + }; + + /** Find all the tids associated with a set of pids and sigstop them (so long as the pid is not in the filter set) */ + [[nodiscard]] std::set stop_all_tids(std::set const & pids, + std::set const & filter_set, + std::map & paused_tids); } #endif diff --git a/daemon/logging/agent_log.cpp b/daemon/logging/agent_log.cpp index a0f85862..ab9135d2 100644 --- a/daemon/logging/agent_log.cpp +++ b/daemon/logging/agent_log.cpp @@ -136,17 +136,6 @@ namespace logging { return decode_str(const_cast(s.data()), const_cast(s.data() + s.size())); } - std::string_view find_end_of_line(std::string_view chars) - { - for (std::size_t n = 0; n < chars.size(); ++n) { - if (chars[n] == '\n') { - return chars.substr(0, n + 1); - } - } - - return {}; - } - constexpr std::size_t expected_no_fields = 7; constexpr std::size_t field_index_level = 0; constexpr std::size_t field_index_tid = 1; @@ -289,26 +278,21 @@ namespace logging { } } - // NOLINTNEXTLINE(misc-no-recursion) void agent_log_reader_t::do_async_read() { - LOG_TRACE("(%p) entered do_async_read", this); - - // NOLINTNEXTLINE(misc-no-recursion) - boost::asio::async_read_until(in, buffer, '\n', [st = shared_from_this()](auto const & ec, auto n) { - // handle error - if (ec) { - LOG_ERROR_IF_NOT_EOF(ec, "(%p) read failed with %s", st.get(), ec.message().c_str()); - return; - } - - // process line of text - return st->do_process_next_line(n); - }); + using namespace async::continuations; + + spawn("agent-log-reader", + async::async_consume_all_lines( + line_reader, + [st = shared_from_this()](std::string_view line) { + // process line of text + st->do_process_next_line(line); + }, + use_continuation)); } - // NOLINTNEXTLINE(misc-no-recursion) - void agent_log_reader_t::do_process_next_line(std::size_t n) + void agent_log_reader_t::do_process_next_line(std::string_view line) { constexpr std::size_t expected_minimum_size = message_start_marker.size() // + 1 // level (int) @@ -326,61 +310,49 @@ namespace logging { + 0 // message (str) + message_end_marker.size(); - // assumes that data returns a single item - static_assert(std::is_same_v); - - // first find the substr containing up-to the first '\n' marker - auto input_area = buffer.data(); - auto read_area = - std::string_view(reinterpret_cast(input_area.data()), std::min(n, input_area.size())); - - auto message = find_end_of_line(read_area); - // empty substr means no marker, get more bytes - if (message.empty()) { + if (line.empty()) { LOG_TRACE("(%p) No end of line found", this); return do_async_read(); } - // the number of bytes to consume - auto n_to_consume = message.size(); - // remove trailing newline (turn it into a null terminator instead so that it can be used with printf) - const_cast(message.back()) = 0; - message.remove_suffix(1); + if (line.back() == '\n') { + const_cast(line.back()) = 0; + line.remove_suffix(1); + } // ignore empty lines - if (message.empty()) { + if (line.empty()) { LOG_TRACE("(%p) Ignoring empty line", this); - buffer.consume(n_to_consume); return do_async_read(); } // must have a minimum size - if (message.size() < expected_minimum_size) { - return do_unexpected_message(n_to_consume, message); + if (line.size() < expected_minimum_size) { + return do_unexpected_message(line); } // does it start with the marker? - if (message.substr(0, message_start_marker.size()) != message_start_marker) { + if (line.substr(0, message_start_marker.size()) != message_start_marker) { // no, just a normal line of text - return do_unexpected_message(n_to_consume, message); + return do_unexpected_message(line); } // does it end with the marker? - if (message.substr(message.size() - message_end_marker.size()) != message_end_marker) { + if (line.substr(line.size() - message_end_marker.size()) != message_end_marker) { // no, just a normal line of text - return do_unexpected_message(n_to_consume, message); + return do_unexpected_message(line); } // find the separators and split the fields - auto inner = message.substr(message_start_marker.size(), - message.size() - (message_start_marker.size() + message_end_marker.size())); + auto inner = line.substr(message_start_marker.size(), + line.size() - (message_start_marker.size() + message_end_marker.size())); auto fields_opt = split_fields(inner); if (!fields_opt) { // no, just a normal line of text - return do_unexpected_message(n_to_consume, message); + return do_unexpected_message(line); } // decode the fields @@ -395,50 +367,28 @@ namespace logging { // all fields must be valid if ((!level_num) || (!tid_num) || (!file) || (!line_num) || (!secs_num) || (!nsec_num) || (!text)) { - // safely null terminate before logging - LOG_TRACE("(%p) Invalid field encoding (%u, %u, %u, %u,%u, %u, %u) in '%s'", - this, - !!level_num, - !!tid_num, - !!file, - !!line_num, - !!secs_num, - !!nsec_num, - !!text, - message.data()); - return do_unexpected_message(n_to_consume, message); + return do_unexpected_message(line); } // a valid message - return do_expected_message(n_to_consume, - thread_id_t(*tid_num), + return do_expected_message(thread_id_t(*tid_num), log_level_t(*level_num), log_timestamp_t {*secs_num, *nsec_num}, source_loc_t {*file, unsigned(*line_num)}, *text); } - // NOLINTNEXTLINE(misc-no-recursion) - void agent_log_reader_t::do_unexpected_message(std::size_t n_to_consume, std::string_view msg) + void agent_log_reader_t::do_unexpected_message(std::string_view msg) { - do_expected_message(n_to_consume, - thread_id_t {0}, - log_level_t::error, - log_timestamp_t {}, - source_loc_t {}, - msg); + do_expected_message(thread_id_t {0}, log_level_t::error, log_timestamp_t {}, source_loc_t {}, msg); } - // NOLINTNEXTLINE(misc-no-recursion) - void agent_log_reader_t::do_expected_message(std::size_t n_to_consume, - thread_id_t tid, + void agent_log_reader_t::do_expected_message(thread_id_t tid, log_level_t level, log_timestamp_t timestamp, source_loc_t location, std::string_view message) { consumer(tid, level, timestamp, location, message); - buffer.consume(n_to_consume); - do_async_read(); } } diff --git a/daemon/logging/agent_log.h b/daemon/logging/agent_log.h index 4270b820..7c79779e 100644 --- a/daemon/logging/agent_log.h +++ b/daemon/logging/agent_log.h @@ -1,8 +1,9 @@ -/* Copyright (C) 2010-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2010-2022 by Arm Limited. All rights reserved. */ #pragma once #include "Logging.h" +#include "async/async_line_reader.hpp" #include "lib/AutoClosingFd.h" #include @@ -14,7 +15,6 @@ #include #include -#include namespace logging { /** Implements log_sink_t for agent sub-processes that log out via the IPC channel */ @@ -67,27 +67,27 @@ namespace logging { } agent_log_reader_t(boost::asio::io_context & io_context, lib::AutoClosingFd && fd, consumer_fn_t consumer) - : in(io_context, fd.release()), consumer(std::move(consumer)) + : consumer(std::move(consumer)), + line_reader(std::make_shared( + boost::asio::posix::stream_descriptor {io_context, fd.release()})) { } private: - boost::asio::posix::stream_descriptor in; consumer_fn_t consumer; - boost::asio::streambuf buffer {}; + std::shared_ptr line_reader; /** Read the next line of data from the stream */ void do_async_read(); /** Process the received line */ - void do_process_next_line(std::size_t n); + void do_process_next_line(std::string_view line); /** Handle the line having an unexpected format */ - void do_unexpected_message(std::size_t n_to_consume, std::string_view msg); + void do_unexpected_message(std::string_view msg); /** Handle the decoded log item */ - void do_expected_message(std::size_t n_to_consume, - thread_id_t tid, + void do_expected_message(thread_id_t tid, log_level_t level, log_timestamp_t timestamp, source_loc_t location, diff --git a/daemon/logging/global_log.cpp b/daemon/logging/global_log.cpp index 8b46bbd8..a1b55442 100644 --- a/daemon/logging/global_log.cpp +++ b/daemon/logging/global_log.cpp @@ -2,9 +2,11 @@ #include "logging/global_log.h" +#include #include #include #include +#include namespace logging { namespace { @@ -31,6 +33,16 @@ namespace logging { } } + global_log_sink_t::global_log_sink_t() + { + // disable buffering of output + ::setvbuf(stdout, nullptr, _IONBF, 0); + ::setvbuf(stderr, nullptr, _IONBF, 0); + // make sure that everything goes to output immediately + std::cout << std::unitbuf; + std::cerr << std::unitbuf; + } + void global_log_sink_t::log_item(thread_id_t tid, log_level_t level, log_timestamp_t const & timestamp, @@ -75,6 +87,21 @@ namespace logging { last_error = std::string(message); output_item(output_debug, "FATAL:", tid, timestamp, location, message); break; + case log_level_t::child_stdout: + if (output_debug) { + output_item(output_debug, "STDOU:", tid, timestamp, location, message); + } + // always output to cout, regardless of whether the cerr log was also output + std::cout << message; + break; + case log_level_t::child_stderr: + if (output_debug) { + output_item(output_debug, "STDER:", tid, timestamp, location, message); + } + else { + std::cerr << message; + } + break; } } } diff --git a/daemon/logging/global_log.h b/daemon/logging/global_log.h index 13c69230..bf7627ed 100644 --- a/daemon/logging/global_log.h +++ b/daemon/logging/global_log.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2010-2022 by Arm Limited. All rights reserved. */ #pragma once @@ -12,6 +12,8 @@ namespace logging { /** Default log sink, prints to stdout / stderr depending on message type and configuration */ class global_log_sink_t : public log_sink_t { public: + global_log_sink_t(); + /** Toggle whether TRACE/DEBUG/SETUP messages are output to the console */ void set_debug_enabled(bool enabled) override { output_debug = enabled; } /** Store some log item to the log */ diff --git a/daemon/logging/logging.cpp b/daemon/logging/logging.cpp index 76aebaf3..bdc60e8a 100644 --- a/daemon/logging/logging.cpp +++ b/daemon/logging/logging.cpp @@ -69,6 +69,18 @@ namespace logging { // write it out log_item(level, location, std::string_view(buffer.get(), n)); } + + void do_log_item(log_level_t level, source_loc_t const & location, std::string_view msg) + { + // write it out + log_item(level, location, msg); + } + + void do_log_item(pid_t tid, log_level_t level, source_loc_t const & location, std::string_view msg) + { + // write it out + log_item(thread_id_t(tid), level, location, msg); + } } void log_item(log_level_t level, source_loc_t const & location, std::string_view message) @@ -76,7 +88,6 @@ namespace logging { std::shared_ptr sink = current_log_sink; if (sink != nullptr) { - struct timespec t; clock_gettime(CLOCK_MONOTONIC, &t); @@ -84,6 +95,18 @@ namespace logging { } } + void log_item(thread_id_t tid, log_level_t level, source_loc_t const & location, std::string_view message) + { + std::shared_ptr sink = current_log_sink; + + if (sink != nullptr) { + struct timespec t; + clock_gettime(CLOCK_MONOTONIC, &t); + + sink->log_item(tid, level, {t.tv_sec, t.tv_nsec}, location, message); + } + } + void log_item(thread_id_t tid, log_level_t level, log_timestamp_t timestamp, diff --git a/daemon/main.cpp b/daemon/main.cpp index 157d3278..a2293b6f 100644 --- a/daemon/main.cpp +++ b/daemon/main.cpp @@ -2,6 +2,7 @@ #include "GatorMain.h" #include "agents/ext_source/ext_source_agent_main.h" +#include "agents/perf/perf_agent_main.h" #include "agents/spawn_agent.h" #include @@ -9,8 +10,13 @@ int main(int argc, char ** argv) { // agent main ? - if ((argc > 1) && (std::string_view(argv[1]) == agents::agent_id_ext_source)) { - return agents::ext_agent_main(argv[0], lib::Span(argv + 2, argc - 2)); + if (argc > 1) { + if (std::string_view(argv[1]) == agents::agent_id_ext_source) { + return agents::ext_agent_main(argv[0], lib::Span(argv + 2, argc - 2)); + } + if (std::string_view(argv[1]) == agents::agent_id_perf) { + return agents::perf::perf_agent_main(argv[0], lib::Span(argv + 2, argc - 2)); + } } return gator_main(argc, argv); diff --git a/daemon/mali_userspace/MaliDevice.cpp b/daemon/mali_userspace/MaliDevice.cpp index 77b30bd4..0fc1703f 100644 --- a/daemon/mali_userspace/MaliDevice.cpp +++ b/daemon/mali_userspace/MaliDevice.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2016-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2016-2022 by Arm Limited. All rights reserved. */ #include "mali_userspace/MaliDevice.h" @@ -6,6 +6,7 @@ #include "Logging.h" #include "lib/Assert.h" #include "mali_userspace/MaliHwCntrNames.h" +#include "mali_userspace/MaliHwCntrNamesBifrost.h" #include #include diff --git a/daemon/mali_userspace/MaliGPUClockPolledDriver.cpp b/daemon/mali_userspace/MaliGPUClockPolledDriver.cpp new file mode 100644 index 00000000..4e8ec368 --- /dev/null +++ b/daemon/mali_userspace/MaliGPUClockPolledDriver.cpp @@ -0,0 +1,82 @@ +/* Copyright (C) 2022 by Arm Limited. All rights reserved. */ + +#include "MaliGPUClockPolledDriver.h" + +namespace mali_userspace { + + static constexpr uint64_t CLOCK_MULTIPLIER = 1000000ULL; + + MaliGPUClockPolledDriver::MaliGPUClockPolledDriver(std::string clockPath, unsigned deviceNumber) + : PolledDriver("MaliGPUClock"), mClockPath(std::move(clockPath)), deviceNumber(deviceNumber) + { + counterName = ARM_MALI_CLOCK.data() + std::to_string(deviceNumber); + LOG_DEBUG("GPU CLOCK POLLING '%s' for mali%d", mClockPath.c_str(), deviceNumber); + } + + void MaliGPUClockPolledDriver::readEvents(mxml_node_t * const /*root*/) + { + if (access(mClockPath.c_str(), R_OK) == 0) { + LOG_SETUP("Mali GPU counters\nAccess %s is OK. GPU frequency counters available.", mClockPath.c_str()); + setCounters( + new mali_userspace::MaliGPUClockPolledDriverCounter(getCounters(), counterName.c_str(), mClockValue)); + } + else { + LOG_SETUP("Mali GPU counters\nCannot access %s. GPU frequency counters not available.", mClockPath.c_str()); + } + } + + int MaliGPUClockPolledDriver::writeCounters(mxml_node_t * root) const + { + int count = 0; + if (access(mClockPath.c_str(), R_OK) == 0) { + mxml_node_t * node = mxmlNewElement(root, "counter"); + mxmlElementSetAttr(node, "name", counterName.c_str()); + count++; + } + else { + LOG_ERROR("Mali GPU counters\nCannot access %s. GPU frequency counters not available.", mClockPath.c_str()); + } + return count; + } + + void MaliGPUClockPolledDriver::read(IBlockCounterFrameBuilder & buffer) + { + if (!doRead()) { + LOG_ERROR("Unable to read GPU clock frequency for %s", mClockPath.c_str()); + handleException(); + } + PolledDriver::read(buffer); + } + + bool MaliGPUClockPolledDriver::doRead() + { + if (!countersEnabled()) { + return true; + } + + if (!mBuf.read(mClockPath.c_str())) { + return false; + } + + mClockValue = strtoull(mBuf.getBuf(), nullptr, 0) * CLOCK_MULTIPLIER; + return true; + } + + void MaliGPUClockPolledDriver::writeEvents(mxml_node_t * root) const + { + mxml_node_t * node = mxmlNewElement(root, "category"); + mxmlElementSetAttr(node, "name", "Mali Misc"); + mxmlElementSetAttr(node, "per_cpu", "no"); + + mxml_node_t * nodeEvent = mxmlNewElement(node, "event"); + mxmlElementSetAttr(nodeEvent, "counter", counterName.c_str()); + mxmlElementSetAttr(nodeEvent, "title", "Mali Clock"); + auto eventName = "Frequency (Device #" + std::to_string(deviceNumber) + ")"; + mxmlElementSetAttr(nodeEvent, "name", eventName.c_str()); + mxmlElementSetAttr(nodeEvent, "class", "absolute"); + mxmlElementSetAttr(nodeEvent, "rendering_type", "line"); + mxmlElementSetAttr(nodeEvent, "display", "maximum"); + mxmlElementSetAttr(nodeEvent, "description", "GPU clock frequency in Hz"); + mxmlElementSetAttr(nodeEvent, "units", "Hz"); + } +} diff --git a/daemon/mali_userspace/MaliGPUClockPolledDriver.h b/daemon/mali_userspace/MaliGPUClockPolledDriver.h index 8e679c69..7dca407b 100644 --- a/daemon/mali_userspace/MaliGPUClockPolledDriver.h +++ b/daemon/mali_userspace/MaliGPUClockPolledDriver.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2019-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2019-2022 by Arm Limited. All rights reserved. */ #ifndef MALI_USERSPACE_MALIGPUCLOCKPOLLEDDRIVER_H_ #define MALI_USERSPACE_MALIGPUCLOCKPOLLEDDRIVER_H_ @@ -7,9 +7,12 @@ #include "Logging.h" #include "MaliGPUClockPolledDriverCounter.h" #include "PolledDriver.h" +#include "mxml/mxml.h" #include #include +#include +#include #include #include @@ -17,14 +20,8 @@ namespace mali_userspace { class MaliGPUClockPolledDriver : public PolledDriver { - private: - using super = PolledDriver; - public: - MaliGPUClockPolledDriver(std::string clockPath) : PolledDriver("MaliGPUClock"), mClockPath(std::move(clockPath)) - { - LOG_DEBUG("GPU CLOCK POLLING '%s'", mClockPath.c_str()); - } + MaliGPUClockPolledDriver(std::string clockPath, unsigned deviceNumber); // Intentionally unimplemented MaliGPUClockPolledDriver(const MaliGPUClockPolledDriver &) = delete; @@ -32,49 +29,24 @@ namespace mali_userspace { MaliGPUClockPolledDriver(MaliGPUClockPolledDriver &&) = delete; MaliGPUClockPolledDriver & operator=(MaliGPUClockPolledDriver &&) = delete; - void readEvents(mxml_node_t * const /*root*/) override - { - if (access(mClockPath.c_str(), R_OK) == 0) { - LOG_SETUP("Mali GPU counters\nAccess %s is OK. GPU frequency counters available.", mClockPath.c_str()); - setCounters( - new mali_userspace::MaliGPUClockPolledDriverCounter(getCounters(), "ARM_Mali-clock", mClockValue)); - } - else { + void readEvents(mxml_node_t * const /*root*/) override; - LOG_SETUP("Mali GPU counters\nCannot access %s. GPU frequency counters not available.", - mClockPath.c_str()); - } - } + int writeCounters(mxml_node_t * root) const override; void start() override {} - - void read(IBlockCounterFrameBuilder & buffer) override - { - if (!doRead()) { - LOG_ERROR("Unable to read GPU clock frequency"); - handleException(); - } - super::read(buffer); - } + void read(IBlockCounterFrameBuilder & buffer) override; + void writeEvents(mxml_node_t * root) const override; private: + static constexpr std::string_view ARM_MALI_CLOCK = "ARM_Mali-clock-"; + std::string mClockPath; + unsigned deviceNumber; + std::string counterName; uint64_t mClockValue {0}; DynBuf mBuf {}; - bool doRead() - { - if (!countersEnabled()) { - return true; - } - - if (!mBuf.read(mClockPath.c_str())) { - return false; - } - - mClockValue = strtoull(mBuf.getBuf(), nullptr, 0) * 1000000ULL; - return true; - } + bool doRead(); }; } #endif /* MALI_USERSPACE_MALIGPUCLOCKPOLLEDDRIVER_H_ */ diff --git a/daemon/mali_userspace/MaliHwCntrDriver.cpp b/daemon/mali_userspace/MaliHwCntrDriver.cpp index a092c83c..cc9af440 100644 --- a/daemon/mali_userspace/MaliHwCntrDriver.cpp +++ b/daemon/mali_userspace/MaliHwCntrDriver.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2016-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2016-2022 by Arm Limited. All rights reserved. */ #include "mali_userspace/MaliHwCntrDriver.h" @@ -66,7 +66,7 @@ namespace mali_userspace { const MaliDevice & device = *mDevice.second; if (!device.getClockPath().empty()) { mPolledDrivers[mDevice.first] = - std::unique_ptr(new MaliGPUClockPolledDriver(device.getClockPath())); + std::unique_ptr(new MaliGPUClockPolledDriver(device.getClockPath(), mDevice.first)); } else { LOG_SETUP("Mali GPU counters\nGPU frequency counters not available for GPU # %d.", mDevice.first); diff --git a/daemon/mali_userspace/MaliHwCntrNames.h b/daemon/mali_userspace/MaliHwCntrNames.h index 915bd016..ff5afd95 100644 --- a/daemon/mali_userspace/MaliHwCntrNames.h +++ b/daemon/mali_userspace/MaliHwCntrNames.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2016-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2016-2022 by Arm Limited. All rights reserved. */ #ifndef NATIVE_GATOR_DAEMON_MALI_USERSPACE_MALIHWCNTRNAMES_H_ #define NATIVE_GATOR_DAEMON_MALI_USERSPACE_MALIHWCNTRNAMES_H_ @@ -2135,3730 +2135,6 @@ namespace mali_userspace { "T88x_L2_SNOOP_FULL", "T88x_L2_REPLAY_FULL"}; - static const char * const hardware_counters_mali_tHEx[] = { - /* Job Manager */ - "", - "", - "", - "", - "THEx_MESSAGES_SENT", - "THEx_MESSAGES_RECEIVED", - "THEx_GPU_ACTIVE", - "THEx_IRQ_ACTIVE", - "THEx_JS0_JOBS", - "THEx_JS0_TASKS", - "THEx_JS0_ACTIVE", - "", - "THEx_JS0_WAIT_READ", - "THEx_JS0_WAIT_ISSUE", - "THEx_JS0_WAIT_DEPEND", - "THEx_JS0_WAIT_FINISH", - "THEx_JS1_JOBS", - "THEx_JS1_TASKS", - "THEx_JS1_ACTIVE", - "", - "THEx_JS1_WAIT_READ", - "THEx_JS1_WAIT_ISSUE", - "THEx_JS1_WAIT_DEPEND", - "THEx_JS1_WAIT_FINISH", - "THEx_JS2_JOBS", - "THEx_JS2_TASKS", - "THEx_JS2_ACTIVE", - "", - "THEx_JS2_WAIT_READ", - "THEx_JS2_WAIT_ISSUE", - "THEx_JS2_WAIT_DEPEND", - "THEx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Tiler */ - "", - "", - "", - "", - "THEx_TILER_ACTIVE", - "THEx_JOBS_PROCESSED", - "THEx_TRIANGLES", - "THEx_LINES", - "THEx_POINTS", - "THEx_FRONT_FACING", - "THEx_BACK_FACING", - "THEx_PRIM_VISIBLE", - "THEx_PRIM_CULLED", - "THEx_PRIM_CLIPPED", - "THEx_PRIM_SAT_CULLED", - "", - "", - "THEx_BUS_READ", - "", - "THEx_BUS_WRITE", - "THEx_LOADING_DESC", - "THEx_IDVS_POS_SHAD_REQ", - "THEx_IDVS_POS_SHAD_WAIT", - "THEx_IDVS_POS_SHAD_STALL", - "THEx_IDVS_POS_FIFO_FULL", - "THEx_PREFETCH_STALL", - "THEx_VCACHE_HIT", - "THEx_VCACHE_MISS", - "THEx_VCACHE_LINE_WAIT", - "THEx_VFETCH_POS_READ_WAIT", - "THEx_VFETCH_VERTEX_WAIT", - "THEx_VFETCH_STALL", - "THEx_PRIMASSY_STALL", - "THEx_BBOX_GEN_STALL", - "THEx_IDVS_VBU_HIT", - "THEx_IDVS_VBU_MISS", - "THEx_IDVS_VBU_LINE_DEALLOCATE", - "THEx_IDVS_VAR_SHAD_REQ", - "THEx_IDVS_VAR_SHAD_STALL", - "THEx_BINNER_STALL", - "THEx_ITER_STALL", - "THEx_COMPRESS_MISS", - "THEx_COMPRESS_STALL", - "THEx_PCACHE_HIT", - "THEx_PCACHE_MISS", - "THEx_PCACHE_MISS_STALL", - "THEx_PCACHE_EVICT_STALL", - "THEx_PMGR_PTR_WR_STALL", - "THEx_PMGR_PTR_RD_STALL", - "THEx_PMGR_CMD_WR_STALL", - "THEx_WRBUF_ACTIVE", - "THEx_WRBUF_HIT", - "THEx_WRBUF_MISS", - "THEx_WRBUF_NO_FREE_LINE_STALL", - "THEx_WRBUF_NO_AXI_ID_STALL", - "THEx_WRBUF_AXI_STALL", - "", - "", - "", - "THEx_UTLB_TRANS", - "THEx_UTLB_TRANS_HIT", - "THEx_UTLB_TRANS_STALL", - "THEx_UTLB_TRANS_MISS_DELAY", - "THEx_UTLB_MMU_REQ", - - /* Shader Core */ - "", - "", - "", - "", - "THEx_FRAG_ACTIVE", - "THEx_FRAG_PRIMITIVES", - "THEx_FRAG_PRIM_RAST", - "THEx_FRAG_FPK_ACTIVE", - "THEx_FRAG_STARVING", - "THEx_FRAG_WARPS", - "THEx_FRAG_PARTIAL_WARPS", - "THEx_FRAG_QUADS_RAST", - "THEx_FRAG_QUADS_EZS_TEST", - "THEx_FRAG_QUADS_EZS_UPDATE", - "THEx_FRAG_QUADS_EZS_KILL", - "THEx_FRAG_LZS_TEST", - "THEx_FRAG_LZS_KILL", - "", - "THEx_FRAG_PTILES", - "THEx_FRAG_TRANS_ELIM", - "THEx_QUAD_FPK_KILLER", - "", - "THEx_COMPUTE_ACTIVE", - "THEx_COMPUTE_TASKS", - "THEx_COMPUTE_WARPS", - "THEx_COMPUTE_STARVING", - "THEx_EXEC_CORE_ACTIVE", - "THEx_EXEC_ACTIVE", - "THEx_EXEC_INSTR_COUNT", - "THEx_EXEC_INSTR_DIVERGED", - "THEx_EXEC_INSTR_STARVING", - "THEx_ARITH_INSTR_SINGLE_FMA", - "THEx_ARITH_INSTR_DOUBLE", - "THEx_ARITH_INSTR_MSG", - "THEx_ARITH_INSTR_MSG_ONLY", - "THEx_TEX_INSTR", - "THEx_TEX_INSTR_MIPMAP", - "THEx_TEX_INSTR_COMPRESSED", - "THEx_TEX_INSTR_3D", - "THEx_TEX_INSTR_TRILINEAR", - "THEx_TEX_COORD_ISSUE", - "THEx_TEX_COORD_STALL", - "THEx_TEX_STARVE_CACHE", - "THEx_TEX_STARVE_FILTER", - "THEx_LS_MEM_READ_FULL", - "THEx_LS_MEM_READ_SHORT", - "THEx_LS_MEM_WRITE_FULL", - "THEx_LS_MEM_WRITE_SHORT", - "THEx_LS_MEM_ATOMIC", - "THEx_VARY_INSTR", - "THEx_VARY_SLOT_32", - "THEx_VARY_SLOT_16", - "THEx_ATTR_INSTR", - "THEx_ARITH_INSTR_FP_MUL", - "THEx_BEATS_RD_FTC", - "THEx_BEATS_RD_FTC_EXT", - "THEx_BEATS_RD_LSC", - "THEx_BEATS_RD_LSC_EXT", - "THEx_BEATS_RD_TEX", - "THEx_BEATS_RD_TEX_EXT", - "THEx_BEATS_RD_OTHER", - "THEx_BEATS_WR_LSC", - "THEx_BEATS_WR_TIB", - "", - - /* L2 and MMU */ - "", - "", - "", - "", - "THEx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "THEx_L2_RD_MSG_IN", - "THEx_L2_RD_MSG_IN_STALL", - "THEx_L2_WR_MSG_IN", - "THEx_L2_WR_MSG_IN_STALL", - "THEx_L2_SNP_MSG_IN", - "THEx_L2_SNP_MSG_IN_STALL", - "THEx_L2_RD_MSG_OUT", - "THEx_L2_RD_MSG_OUT_STALL", - "THEx_L2_WR_MSG_OUT", - "THEx_L2_ANY_LOOKUP", - "THEx_L2_READ_LOOKUP", - "THEx_L2_WRITE_LOOKUP", - "THEx_L2_EXT_SNOOP_LOOKUP", - "THEx_L2_EXT_READ", - "THEx_L2_EXT_READ_NOSNP", - "THEx_L2_EXT_READ_UNIQUE", - "THEx_L2_EXT_READ_BEATS", - "THEx_L2_EXT_AR_STALL", - "THEx_L2_EXT_AR_CNT_Q1", - "THEx_L2_EXT_AR_CNT_Q2", - "THEx_L2_EXT_AR_CNT_Q3", - "THEx_L2_EXT_RRESP_0_127", - "THEx_L2_EXT_RRESP_128_191", - "THEx_L2_EXT_RRESP_192_255", - "THEx_L2_EXT_RRESP_256_319", - "THEx_L2_EXT_RRESP_320_383", - "THEx_L2_EXT_WRITE", - "THEx_L2_EXT_WRITE_NOSNP_FULL", - "THEx_L2_EXT_WRITE_NOSNP_PTL", - "THEx_L2_EXT_WRITE_SNP_FULL", - "THEx_L2_EXT_WRITE_SNP_PTL", - "THEx_L2_EXT_WRITE_BEATS", - "THEx_L2_EXT_W_STALL", - "THEx_L2_EXT_AW_CNT_Q1", - "THEx_L2_EXT_AW_CNT_Q2", - "THEx_L2_EXT_AW_CNT_Q3", - "THEx_L2_EXT_SNOOP", - "THEx_L2_EXT_SNOOP_STALL", - "THEx_L2_EXT_SNOOP_RESP_CLEAN", - "THEx_L2_EXT_SNOOP_RESP_DATA", - "THEx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", - }; - - static const char * const hardware_counters_mali_tMIx[] = { - /* Job Manager */ - "", - "", - "", - "", - "TMIx_MESSAGES_SENT", - "TMIx_MESSAGES_RECEIVED", - "TMIx_GPU_ACTIVE", - "TMIx_IRQ_ACTIVE", - "TMIx_JS0_JOBS", - "TMIx_JS0_TASKS", - "TMIx_JS0_ACTIVE", - "", - "TMIx_JS0_WAIT_READ", - "TMIx_JS0_WAIT_ISSUE", - "TMIx_JS0_WAIT_DEPEND", - "TMIx_JS0_WAIT_FINISH", - "TMIx_JS1_JOBS", - "TMIx_JS1_TASKS", - "TMIx_JS1_ACTIVE", - "", - "TMIx_JS1_WAIT_READ", - "TMIx_JS1_WAIT_ISSUE", - "TMIx_JS1_WAIT_DEPEND", - "TMIx_JS1_WAIT_FINISH", - "TMIx_JS2_JOBS", - "TMIx_JS2_TASKS", - "TMIx_JS2_ACTIVE", - "", - "TMIx_JS2_WAIT_READ", - "TMIx_JS2_WAIT_ISSUE", - "TMIx_JS2_WAIT_DEPEND", - "TMIx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Tiler */ - "", - "", - "", - "", - "TMIx_TILER_ACTIVE", - "TMIx_JOBS_PROCESSED", - "TMIx_TRIANGLES", - "TMIx_LINES", - "TMIx_POINTS", - "TMIx_FRONT_FACING", - "TMIx_BACK_FACING", - "TMIx_PRIM_VISIBLE", - "TMIx_PRIM_CULLED", - "TMIx_PRIM_CLIPPED", - "TMIx_PRIM_SAT_CULLED", - "TMIx_BIN_ALLOC_INIT", - "TMIx_BIN_ALLOC_OVERFLOW", - "TMIx_BUS_READ", - "", - "TMIx_BUS_WRITE", - "TMIx_LOADING_DESC", - "TMIx_IDVS_POS_SHAD_REQ", - "TMIx_IDVS_POS_SHAD_WAIT", - "TMIx_IDVS_POS_SHAD_STALL", - "TMIx_IDVS_POS_FIFO_FULL", - "TMIx_PREFETCH_STALL", - "TMIx_VCACHE_HIT", - "TMIx_VCACHE_MISS", - "TMIx_VCACHE_LINE_WAIT", - "TMIx_VFETCH_POS_READ_WAIT", - "TMIx_VFETCH_VERTEX_WAIT", - "TMIx_VFETCH_STALL", - "TMIx_PRIMASSY_STALL", - "TMIx_BBOX_GEN_STALL", - "TMIx_IDVS_VBU_HIT", - "TMIx_IDVS_VBU_MISS", - "TMIx_IDVS_VBU_LINE_DEALLOCATE", - "TMIx_IDVS_VAR_SHAD_REQ", - "TMIx_IDVS_VAR_SHAD_STALL", - "TMIx_BINNER_STALL", - "TMIx_ITER_STALL", - "TMIx_COMPRESS_MISS", - "TMIx_COMPRESS_STALL", - "TMIx_PCACHE_HIT", - "TMIx_PCACHE_MISS", - "TMIx_PCACHE_MISS_STALL", - "TMIx_PCACHE_EVICT_STALL", - "TMIx_PMGR_PTR_WR_STALL", - "TMIx_PMGR_PTR_RD_STALL", - "TMIx_PMGR_CMD_WR_STALL", - "TMIx_WRBUF_ACTIVE", - "TMIx_WRBUF_HIT", - "TMIx_WRBUF_MISS", - "TMIx_WRBUF_NO_FREE_LINE_STALL", - "TMIx_WRBUF_NO_AXI_ID_STALL", - "TMIx_WRBUF_AXI_STALL", - "", - "", - "", - "TMIx_UTLB_TRANS", - "TMIx_UTLB_TRANS_HIT", - "TMIx_UTLB_TRANS_STALL", - "TMIx_UTLB_TRANS_MISS_DELAY", - "TMIx_UTLB_MMU_REQ", - - /* Shader Core */ - "", - "", - "", - "", - "TMIx_FRAG_ACTIVE", - "TMIx_FRAG_PRIMITIVES", - "TMIx_FRAG_PRIM_RAST", - "TMIx_FRAG_FPK_ACTIVE", - "TMIx_FRAG_STARVING", - "TMIx_FRAG_WARPS", - "TMIx_FRAG_PARTIAL_WARPS", - "TMIx_FRAG_QUADS_RAST", - "TMIx_FRAG_QUADS_EZS_TEST", - "TMIx_FRAG_QUADS_EZS_UPDATE", - "TMIx_FRAG_QUADS_EZS_KILL", - "TMIx_FRAG_LZS_TEST", - "TMIx_FRAG_LZS_KILL", - "", - "TMIx_FRAG_PTILES", - "TMIx_FRAG_TRANS_ELIM", - "TMIx_QUAD_FPK_KILLER", - "", - "TMIx_COMPUTE_ACTIVE", - "TMIx_COMPUTE_TASKS", - "TMIx_COMPUTE_WARPS", - "TMIx_COMPUTE_STARVING", - "TMIx_EXEC_CORE_ACTIVE", - "TMIx_EXEC_ACTIVE", - "TMIx_EXEC_INSTR_COUNT", - "TMIx_EXEC_INSTR_DIVERGED", - "TMIx_EXEC_INSTR_STARVING", - "TMIx_ARITH_INSTR_SINGLE_FMA", - "TMIx_ARITH_INSTR_DOUBLE", - "TMIx_ARITH_INSTR_MSG", - "TMIx_ARITH_INSTR_MSG_ONLY", - "TMIx_TEX_INSTR", - "TMIx_TEX_INSTR_MIPMAP", - "TMIx_TEX_INSTR_COMPRESSED", - "TMIx_TEX_INSTR_3D", - "TMIx_TEX_INSTR_TRILINEAR", - "TMIx_TEX_COORD_ISSUE", - "TMIx_TEX_COORD_STALL", - "TMIx_TEX_STARVE_CACHE", - "TMIx_TEX_STARVE_FILTER", - "TMIx_LS_MEM_READ_FULL", - "TMIx_LS_MEM_READ_SHORT", - "TMIx_LS_MEM_WRITE_FULL", - "TMIx_LS_MEM_WRITE_SHORT", - "TMIx_LS_MEM_ATOMIC", - "TMIx_VARY_INSTR", - "TMIx_VARY_SLOT_32", - "TMIx_VARY_SLOT_16", - "TMIx_ATTR_INSTR", - "TMIx_ARITH_INSTR_FP_MUL", - "TMIx_BEATS_RD_FTC", - "TMIx_BEATS_RD_FTC_EXT", - "TMIx_BEATS_RD_LSC", - "TMIx_BEATS_RD_LSC_EXT", - "TMIx_BEATS_RD_TEX", - "TMIx_BEATS_RD_TEX_EXT", - "TMIx_BEATS_RD_OTHER", - "TMIx_BEATS_WR_LSC", - "TMIx_BEATS_WR_TIB", - "", - - /* L2 and MMU */ - "", - "", - "", - "", - "TMIx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TMIx_L2_RD_MSG_IN", - "TMIx_L2_RD_MSG_IN_STALL", - "TMIx_L2_WR_MSG_IN", - "TMIx_L2_WR_MSG_IN_STALL", - "TMIx_L2_SNP_MSG_IN", - "TMIx_L2_SNP_MSG_IN_STALL", - "TMIx_L2_RD_MSG_OUT", - "TMIx_L2_RD_MSG_OUT_STALL", - "TMIx_L2_WR_MSG_OUT", - "TMIx_L2_ANY_LOOKUP", - "TMIx_L2_READ_LOOKUP", - "TMIx_L2_WRITE_LOOKUP", - "TMIx_L2_EXT_SNOOP_LOOKUP", - "TMIx_L2_EXT_READ", - "TMIx_L2_EXT_READ_NOSNP", - "TMIx_L2_EXT_READ_UNIQUE", - "TMIx_L2_EXT_READ_BEATS", - "TMIx_L2_EXT_AR_STALL", - "TMIx_L2_EXT_AR_CNT_Q1", - "TMIx_L2_EXT_AR_CNT_Q2", - "TMIx_L2_EXT_AR_CNT_Q3", - "TMIx_L2_EXT_RRESP_0_127", - "TMIx_L2_EXT_RRESP_128_191", - "TMIx_L2_EXT_RRESP_192_255", - "TMIx_L2_EXT_RRESP_256_319", - "TMIx_L2_EXT_RRESP_320_383", - "TMIx_L2_EXT_WRITE", - "TMIx_L2_EXT_WRITE_NOSNP_FULL", - "TMIx_L2_EXT_WRITE_NOSNP_PTL", - "TMIx_L2_EXT_WRITE_SNP_FULL", - "TMIx_L2_EXT_WRITE_SNP_PTL", - "TMIx_L2_EXT_WRITE_BEATS", - "TMIx_L2_EXT_W_STALL", - "TMIx_L2_EXT_AW_CNT_Q1", - "TMIx_L2_EXT_AW_CNT_Q2", - "TMIx_L2_EXT_AW_CNT_Q3", - "TMIx_L2_EXT_SNOOP", - "TMIx_L2_EXT_SNOOP_STALL", - "TMIx_L2_EXT_SNOOP_RESP_CLEAN", - "TMIx_L2_EXT_SNOOP_RESP_DATA", - "TMIx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", - }; - - static const char * const hardware_counters_mali_tDVx[] = { - /* Job Manager */ - "", - "", - "", - "", - "TDVx_MESSAGES_SENT", - "TDVx_MESSAGES_RECEIVED", - "TDVx_GPU_ACTIVE", - "TDVx_IRQ_ACTIVE", - "TDVx_JS0_JOBS", - "TDVx_JS0_TASKS", - "TDVx_JS0_ACTIVE", - "TDVx_JS0_WAIT_FLUSH", - "TDVx_JS0_WAIT_READ", - "TDVx_JS0_WAIT_ISSUE", - "TDVx_JS0_WAIT_DEPEND", - "TDVx_JS0_WAIT_FINISH", - "TDVx_JS1_JOBS", - "TDVx_JS1_TASKS", - "TDVx_JS1_ACTIVE", - "TDVx_JS1_WAIT_FLUSH", - "TDVx_JS1_WAIT_READ", - "TDVx_JS1_WAIT_ISSUE", - "TDVx_JS1_WAIT_DEPEND", - "TDVx_JS1_WAIT_FINISH", - "TDVx_JS2_JOBS", - "TDVx_JS2_TASKS", - "TDVx_JS2_ACTIVE", - "TDVx_JS2_WAIT_FLUSH", - "TDVx_JS2_WAIT_READ", - "TDVx_JS2_WAIT_ISSUE", - "TDVx_JS2_WAIT_DEPEND", - "TDVx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TDVx_CACHE_FLUSH", - - /* Tiler */ - "", - "", - "", - "", - "TDVx_TILER_ACTIVE", - "TDVx_JOBS_PROCESSED", - "TDVx_TRIANGLES", - "TDVx_LINES", - "TDVx_POINTS", - "TDVx_FRONT_FACING", - "TDVx_BACK_FACING", - "TDVx_PRIM_VISIBLE", - "TDVx_PRIM_CULLED", - "TDVx_PRIM_CLIPPED", - "TDVx_PRIM_SAT_CULLED", - "TDVx_BIN_ALLOC_INIT", - "TDVx_BIN_ALLOC_OVERFLOW", - "TDVx_BUS_READ", - "", - "TDVx_BUS_WRITE", - "TDVx_LOADING_DESC", - "TDVx_IDVS_POS_SHAD_REQ", - "TDVx_IDVS_POS_SHAD_WAIT", - "TDVx_IDVS_POS_SHAD_STALL", - "TDVx_IDVS_POS_FIFO_FULL", - "TDVx_PREFETCH_STALL", - "TDVx_VCACHE_HIT", - "TDVx_VCACHE_MISS", - "TDVx_VCACHE_LINE_WAIT", - "TDVx_VFETCH_POS_READ_WAIT", - "TDVx_VFETCH_VERTEX_WAIT", - "TDVx_VFETCH_STALL", - "TDVx_PRIMASSY_STALL", - "TDVx_BBOX_GEN_STALL", - "TDVx_IDVS_VBU_HIT", - "TDVx_IDVS_VBU_MISS", - "TDVx_IDVS_VBU_LINE_DEALLOCATE", - "TDVx_IDVS_VAR_SHAD_REQ", - "TDVx_IDVS_VAR_SHAD_STALL", - "TDVx_BINNER_STALL", - "TDVx_ITER_STALL", - "TDVx_COMPRESS_MISS", - "TDVx_COMPRESS_STALL", - "TDVx_PCACHE_HIT", - "TDVx_PCACHE_MISS", - "TDVx_PCACHE_MISS_STALL", - "TDVx_PCACHE_EVICT_STALL", - "TDVx_PMGR_PTR_WR_STALL", - "TDVx_PMGR_PTR_RD_STALL", - "TDVx_PMGR_CMD_WR_STALL", - "TDVx_WRBUF_ACTIVE", - "TDVx_WRBUF_HIT", - "TDVx_WRBUF_MISS", - "TDVx_WRBUF_NO_FREE_LINE_STALL", - "TDVx_WRBUF_NO_AXI_ID_STALL", - "TDVx_WRBUF_AXI_STALL", - "", - "", - "", - "TDVx_UTLB_TRANS", - "TDVx_UTLB_TRANS_HIT", - "TDVx_UTLB_TRANS_STALL", - "TDVx_UTLB_TRANS_MISS_DELAY", - "TDVx_UTLB_MMU_REQ", - - /* Shader Core */ - "", - "", - "", - "", - "TDVx_FRAG_ACTIVE", - "TDVx_FRAG_PRIMITIVES", - "TDVx_FRAG_PRIM_RAST", - "TDVx_FRAG_FPK_ACTIVE", - "TDVx_FRAG_STARVING", - "TDVx_FRAG_WARPS", - "TDVx_FRAG_PARTIAL_WARPS", - "TDVx_FRAG_QUADS_RAST", - "TDVx_FRAG_QUADS_EZS_TEST", - "TDVx_FRAG_QUADS_EZS_UPDATE", - "TDVx_FRAG_QUADS_EZS_KILL", - "TDVx_FRAG_LZS_TEST", - "TDVx_FRAG_LZS_KILL", - "", - "TDVx_FRAG_PTILES", - "TDVx_FRAG_TRANS_ELIM", - "TDVx_QUAD_FPK_KILLER", - "", - "TDVx_COMPUTE_ACTIVE", - "TDVx_COMPUTE_TASKS", - "TDVx_COMPUTE_WARPS", - "TDVx_COMPUTE_STARVING", - "TDVx_EXEC_CORE_ACTIVE", - "TDVx_EXEC_ACTIVE", - "TDVx_EXEC_INSTR_COUNT", - "TDVx_EXEC_INSTR_DIVERGED", - "TDVx_EXEC_INSTR_STARVING", - "TDVx_ARITH_INSTR_SINGLE_FMA", - "TDVx_ARITH_INSTR_DOUBLE", - "TDVx_ARITH_INSTR_MSG", - "TDVx_ARITH_INSTR_MSG_ONLY", - "TDVx_TEX_MSGI_NUM_QUADS", - "TDVx_TEX_DFCH_NUM_PASSES", - "TDVx_TEX_DFCH_NUM_PASSES_MISS", - "TDVx_TEX_DFCH_NUM_PASSES_MIP_MAP", - "TDVx_TEX_TIDX_NUM_SPLIT_MIP_MAP", - "TDVx_TEX_TFCH_NUM_LINES_FETCHED", - "TDVx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED", - "TDVx_TEX_TFCH_NUM_OPERATIONS", - "TDVx_TEX_FILT_NUM_OPERATIONS", - "TDVx_LS_MEM_READ_FULL", - "TDVx_LS_MEM_READ_SHORT", - "TDVx_LS_MEM_WRITE_FULL", - "TDVx_LS_MEM_WRITE_SHORT", - "TDVx_LS_MEM_ATOMIC", - "TDVx_VARY_INSTR", - "TDVx_VARY_SLOT_32", - "TDVx_VARY_SLOT_16", - "TDVx_ATTR_INSTR", - "TDVx_ARITH_INSTR_FP_MUL", - "TDVx_BEATS_RD_FTC", - "TDVx_BEATS_RD_FTC_EXT", - "TDVx_BEATS_RD_LSC", - "TDVx_BEATS_RD_LSC_EXT", - "TDVx_BEATS_RD_TEX", - "TDVx_BEATS_RD_TEX_EXT", - "TDVx_BEATS_RD_OTHER", - "TDVx_BEATS_WR_LSC_OTHER", - "TDVx_BEATS_WR_TIB", - "TDVx_BEATS_WR_LSC_WB", - - /* L2 and MMU */ - "", - "", - "", - "", - "TDVx_MMU_REQUESTS", - "TDVx_MMU_TABLE_READS_L3", - "TDVx_MMU_TABLE_READS_L2", - "TDVx_MMU_HIT_L3", - "TDVx_MMU_HIT_L2", - "TDVx_MMU_S2_REQUESTS", - "TDVx_MMU_S2_TABLE_READS_L3", - "TDVx_MMU_S2_TABLE_READS_L2", - "TDVx_MMU_S2_HIT_L3", - "TDVx_MMU_S2_HIT_L2", - "", - "", - "TDVx_L2_RD_MSG_IN", - "TDVx_L2_RD_MSG_IN_STALL", - "TDVx_L2_WR_MSG_IN", - "TDVx_L2_WR_MSG_IN_STALL", - "TDVx_L2_SNP_MSG_IN", - "TDVx_L2_SNP_MSG_IN_STALL", - "TDVx_L2_RD_MSG_OUT", - "TDVx_L2_RD_MSG_OUT_STALL", - "TDVx_L2_WR_MSG_OUT", - "TDVx_L2_ANY_LOOKUP", - "TDVx_L2_READ_LOOKUP", - "TDVx_L2_WRITE_LOOKUP", - "TDVx_L2_EXT_SNOOP_LOOKUP", - "TDVx_L2_EXT_READ", - "TDVx_L2_EXT_READ_NOSNP", - "TDVx_L2_EXT_READ_UNIQUE", - "TDVx_L2_EXT_READ_BEATS", - "TDVx_L2_EXT_AR_STALL", - "TDVx_L2_EXT_AR_CNT_Q1", - "TDVx_L2_EXT_AR_CNT_Q2", - "TDVx_L2_EXT_AR_CNT_Q3", - "TDVx_L2_EXT_RRESP_0_127", - "TDVx_L2_EXT_RRESP_128_191", - "TDVx_L2_EXT_RRESP_192_255", - "TDVx_L2_EXT_RRESP_256_319", - "TDVx_L2_EXT_RRESP_320_383", - "TDVx_L2_EXT_WRITE", - "TDVx_L2_EXT_WRITE_NOSNP_FULL", - "TDVx_L2_EXT_WRITE_NOSNP_PTL", - "TDVx_L2_EXT_WRITE_SNP_FULL", - "TDVx_L2_EXT_WRITE_SNP_PTL", - "TDVx_L2_EXT_WRITE_BEATS", - "TDVx_L2_EXT_W_STALL", - "TDVx_L2_EXT_AW_CNT_Q1", - "TDVx_L2_EXT_AW_CNT_Q2", - "TDVx_L2_EXT_AW_CNT_Q3", - "TDVx_L2_EXT_SNOOP", - "TDVx_L2_EXT_SNOOP_STALL", - "TDVx_L2_EXT_SNOOP_RESP_CLEAN", - "TDVx_L2_EXT_SNOOP_RESP_DATA", - "TDVx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", - }; - - static const char * const hardware_counters_mali_tSIx[] = { - /* Job Manager */ - "", - "", - "", - "", - "TSIx_MESSAGES_SENT", - "TSIx_MESSAGES_RECEIVED", - "TSIx_GPU_ACTIVE", - "TSIx_IRQ_ACTIVE", - "TSIx_JS0_JOBS", - "TSIx_JS0_TASKS", - "TSIx_JS0_ACTIVE", - "TSIx_JS0_WAIT_FLUSH", - "TSIx_JS0_WAIT_READ", - "TSIx_JS0_WAIT_ISSUE", - "TSIx_JS0_WAIT_DEPEND", - "TSIx_JS0_WAIT_FINISH", - "TSIx_JS1_JOBS", - "TSIx_JS1_TASKS", - "TSIx_JS1_ACTIVE", - "TSIx_JS1_WAIT_FLUSH", - "TSIx_JS1_WAIT_READ", - "TSIx_JS1_WAIT_ISSUE", - "TSIx_JS1_WAIT_DEPEND", - "TSIx_JS1_WAIT_FINISH", - "TSIx_JS2_JOBS", - "TSIx_JS2_TASKS", - "TSIx_JS2_ACTIVE", - "TSIx_JS2_WAIT_FLUSH", - "TSIx_JS2_WAIT_READ", - "TSIx_JS2_WAIT_ISSUE", - "TSIx_JS2_WAIT_DEPEND", - "TSIx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Tiler */ - "", - "", - "", - "", - "TSIx_TILER_ACTIVE", - "TSIx_JOBS_PROCESSED", - "TSIx_TRIANGLES", - "TSIx_LINES", - "TSIx_POINTS", - "TSIx_FRONT_FACING", - "TSIx_BACK_FACING", - "TSIx_PRIM_VISIBLE", - "TSIx_PRIM_CULLED", - "TSIx_PRIM_CLIPPED", - "TSIx_PRIM_SAT_CULLED", - "TSIx_BIN_ALLOC_INIT", - "TSIx_BIN_ALLOC_OVERFLOW", - "TSIx_BUS_READ", - "", - "TSIx_BUS_WRITE", - "TSIx_LOADING_DESC", - "TSIx_IDVS_POS_SHAD_REQ", - "TSIx_IDVS_POS_SHAD_WAIT", - "TSIx_IDVS_POS_SHAD_STALL", - "TSIx_IDVS_POS_FIFO_FULL", - "TSIx_PREFETCH_STALL", - "TSIx_VCACHE_HIT", - "TSIx_VCACHE_MISS", - "TSIx_VCACHE_LINE_WAIT", - "TSIx_VFETCH_POS_READ_WAIT", - "TSIx_VFETCH_VERTEX_WAIT", - "TSIx_VFETCH_STALL", - "TSIx_PRIMASSY_STALL", - "TSIx_BBOX_GEN_STALL", - "TSIx_IDVS_VBU_HIT", - "TSIx_IDVS_VBU_MISS", - "TSIx_IDVS_VBU_LINE_DEALLOCATE", - "TSIx_IDVS_VAR_SHAD_REQ", - "TSIx_IDVS_VAR_SHAD_STALL", - "TSIx_BINNER_STALL", - "TSIx_ITER_STALL", - "TSIx_COMPRESS_MISS", - "TSIx_COMPRESS_STALL", - "TSIx_PCACHE_HIT", - "TSIx_PCACHE_MISS", - "TSIx_PCACHE_MISS_STALL", - "TSIx_PCACHE_EVICT_STALL", - "TSIx_PMGR_PTR_WR_STALL", - "TSIx_PMGR_PTR_RD_STALL", - "TSIx_PMGR_CMD_WR_STALL", - "TSIx_WRBUF_ACTIVE", - "TSIx_WRBUF_HIT", - "TSIx_WRBUF_MISS", - "TSIx_WRBUF_NO_FREE_LINE_STALL", - "TSIx_WRBUF_NO_AXI_ID_STALL", - "TSIx_WRBUF_AXI_STALL", - "", - "", - "", - "TSIx_UTLB_TRANS", - "TSIx_UTLB_TRANS_HIT", - "TSIx_UTLB_TRANS_STALL", - "TSIx_UTLB_TRANS_MISS_DELAY", - "TSIx_UTLB_MMU_REQ", - - /* Shader Core */ - "", - "", - "", - "", - "TSIx_FRAG_ACTIVE", - "TSIx_FRAG_PRIMITIVES", - "TSIx_FRAG_PRIM_RAST", - "TSIx_FRAG_FPK_ACTIVE", - "TSIx_FRAG_STARVING", - "TSIx_FRAG_WARPS", - "TSIx_FRAG_PARTIAL_WARPS", - "TSIx_FRAG_QUADS_RAST", - "TSIx_FRAG_QUADS_EZS_TEST", - "TSIx_FRAG_QUADS_EZS_UPDATE", - "TSIx_FRAG_QUADS_EZS_KILL", - "TSIx_FRAG_LZS_TEST", - "TSIx_FRAG_LZS_KILL", - "", - "TSIx_FRAG_PTILES", - "TSIx_FRAG_TRANS_ELIM", - "TSIx_QUAD_FPK_KILLER", - "", - "TSIx_COMPUTE_ACTIVE", - "TSIx_COMPUTE_TASKS", - "TSIx_COMPUTE_WARPS", - "TSIx_COMPUTE_STARVING", - "TSIx_EXEC_CORE_ACTIVE", - "TSIx_EXEC_ACTIVE", - "TSIx_EXEC_INSTR_COUNT", - "TSIx_EXEC_INSTR_DIVERGED", - "TSIx_EXEC_INSTR_STARVING", - "TSIx_ARITH_INSTR_SINGLE_FMA", - "TSIx_ARITH_INSTR_DOUBLE", - "TSIx_ARITH_INSTR_MSG", - "TSIx_ARITH_INSTR_MSG_ONLY", - "TSIx_TEX_MSGI_NUM_QUADS", - "TSIx_TEX_DFCH_NUM_PASSES", - "TSIx_TEX_DFCH_NUM_PASSES_MISS", - "TSIx_TEX_DFCH_NUM_PASSES_MIP_MAP", - "TSIx_TEX_TIDX_NUM_SPLIT_MIP_MAP", - "TSIx_TEX_TFCH_NUM_LINES_FETCHED", - "TSIx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED", - "TSIx_TEX_TFCH_NUM_OPERATIONS", - "TSIx_TEX_FILT_NUM_OPERATIONS", - "TSIx_LS_MEM_READ_FULL", - "TSIx_LS_MEM_READ_SHORT", - "TSIx_LS_MEM_WRITE_FULL", - "TSIx_LS_MEM_WRITE_SHORT", - "TSIx_LS_MEM_ATOMIC", - "TSIx_VARY_INSTR", - "TSIx_VARY_SLOT_32", - "TSIx_VARY_SLOT_16", - "TSIx_ATTR_INSTR", - "TSIx_ARITH_INSTR_FP_MUL", - "TSIx_BEATS_RD_FTC", - "TSIx_BEATS_RD_FTC_EXT", - "TSIx_BEATS_RD_LSC", - "TSIx_BEATS_RD_LSC_EXT", - "TSIx_BEATS_RD_TEX", - "TSIx_BEATS_RD_TEX_EXT", - "TSIx_BEATS_RD_OTHER", - "TSIx_BEATS_WR_LSC_OTHER", - "TSIx_BEATS_WR_TIB", - "TSIx_BEATS_WR_LSC_WB", - - /* L2 and MMU */ - "", - "", - "", - "", - "TSIx_MMU_REQUESTS", - "TSIx_MMU_TABLE_READS_L3", - "TSIx_MMU_TABLE_READS_L2", - "TSIx_MMU_HIT_L3", - "TSIx_MMU_HIT_L2", - "TSIx_MMU_S2_REQUESTS", - "TSIx_MMU_S2_TABLE_READS_L3", - "TSIx_MMU_S2_TABLE_READS_L2", - "TSIx_MMU_S2_HIT_L3", - "TSIx_MMU_S2_HIT_L2", - "", - "", - "TSIx_L2_RD_MSG_IN", - "TSIx_L2_RD_MSG_IN_STALL", - "TSIx_L2_WR_MSG_IN", - "TSIx_L2_WR_MSG_IN_STALL", - "TSIx_L2_SNP_MSG_IN", - "TSIx_L2_SNP_MSG_IN_STALL", - "TSIx_L2_RD_MSG_OUT", - "TSIx_L2_RD_MSG_OUT_STALL", - "TSIx_L2_WR_MSG_OUT", - "TSIx_L2_ANY_LOOKUP", - "TSIx_L2_READ_LOOKUP", - "TSIx_L2_WRITE_LOOKUP", - "TSIx_L2_EXT_SNOOP_LOOKUP", - "TSIx_L2_EXT_READ", - "TSIx_L2_EXT_READ_NOSNP", - "TSIx_L2_EXT_READ_UNIQUE", - "TSIx_L2_EXT_READ_BEATS", - "TSIx_L2_EXT_AR_STALL", - "TSIx_L2_EXT_AR_CNT_Q1", - "TSIx_L2_EXT_AR_CNT_Q2", - "TSIx_L2_EXT_AR_CNT_Q3", - "TSIx_L2_EXT_RRESP_0_127", - "TSIx_L2_EXT_RRESP_128_191", - "TSIx_L2_EXT_RRESP_192_255", - "TSIx_L2_EXT_RRESP_256_319", - "TSIx_L2_EXT_RRESP_320_383", - "TSIx_L2_EXT_WRITE", - "TSIx_L2_EXT_WRITE_NOSNP_FULL", - "TSIx_L2_EXT_WRITE_NOSNP_PTL", - "TSIx_L2_EXT_WRITE_SNP_FULL", - "TSIx_L2_EXT_WRITE_SNP_PTL", - "TSIx_L2_EXT_WRITE_BEATS", - "TSIx_L2_EXT_W_STALL", - "TSIx_L2_EXT_AW_CNT_Q1", - "TSIx_L2_EXT_AW_CNT_Q2", - "TSIx_L2_EXT_AW_CNT_Q3", - "TSIx_L2_EXT_SNOOP", - "TSIx_L2_EXT_SNOOP_STALL", - "TSIx_L2_EXT_SNOOP_RESP_CLEAN", - "TSIx_L2_EXT_SNOOP_RESP_DATA", - "TSIx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", - }; - - static const char * const hardware_counters_mali_tNOx[] = { - /* Job Manager */ - "", - "", - "", - "", - "TNOx_MESSAGES_SENT", - "TNOx_MESSAGES_RECEIVED", - "TNOx_GPU_ACTIVE", - "TNOx_IRQ_ACTIVE", - "TNOx_JS0_JOBS", - "TNOx_JS0_TASKS", - "TNOx_JS0_ACTIVE", - "TNOx_JS0_WAIT_FLUSH", - "TNOx_JS0_WAIT_READ", - "TNOx_JS0_WAIT_ISSUE", - "TNOx_JS0_WAIT_DEPEND", - "TNOx_JS0_WAIT_FINISH", - "TNOx_JS1_JOBS", - "TNOx_JS1_TASKS", - "TNOx_JS1_ACTIVE", - "TNOx_JS1_WAIT_FLUSH", - "TNOx_JS1_WAIT_READ", - "TNOx_JS1_WAIT_ISSUE", - "TNOx_JS1_WAIT_DEPEND", - "TNOx_JS1_WAIT_FINISH", - "TNOx_JS2_JOBS", - "TNOx_JS2_TASKS", - "TNOx_JS2_ACTIVE", - "TNOx_JS2_WAIT_FLUSH", - "TNOx_JS2_WAIT_READ", - "TNOx_JS2_WAIT_ISSUE", - "TNOx_JS2_WAIT_DEPEND", - "TNOx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TNOx_CACHE_FLUSH", - - /* Tiler */ - "", - "", - "", - "", - "TNOx_TILER_ACTIVE", - "TNOx_JOBS_PROCESSED", - "TNOx_TRIANGLES", - "TNOx_LINES", - "TNOx_POINTS", - "TNOx_FRONT_FACING", - "TNOx_BACK_FACING", - "TNOx_PRIM_VISIBLE", - "TNOx_PRIM_CULLED", - "TNOx_PRIM_CLIPPED", - "TNOx_PRIM_SAT_CULLED", - "TNOx_BIN_ALLOC_INIT", - "TNOx_BIN_ALLOC_OVERFLOW", - "TNOx_BUS_READ", - "", - "TNOx_BUS_WRITE", - "TNOx_LOADING_DESC", - "TNOx_IDVS_POS_SHAD_REQ", - "TNOx_IDVS_POS_SHAD_WAIT", - "TNOx_IDVS_POS_SHAD_STALL", - "TNOx_IDVS_POS_FIFO_FULL", - "TNOx_PREFETCH_STALL", - "TNOx_VCACHE_HIT", - "TNOx_VCACHE_MISS", - "TNOx_VCACHE_LINE_WAIT", - "TNOx_VFETCH_POS_READ_WAIT", - "TNOx_VFETCH_VERTEX_WAIT", - "TNOx_VFETCH_STALL", - "TNOx_PRIMASSY_STALL", - "TNOx_BBOX_GEN_STALL", - "TNOx_IDVS_VBU_HIT", - "TNOx_IDVS_VBU_MISS", - "TNOx_IDVS_VBU_LINE_DEALLOCATE", - "TNOx_IDVS_VAR_SHAD_REQ", - "TNOx_IDVS_VAR_SHAD_STALL", - "TNOx_BINNER_STALL", - "TNOx_ITER_STALL", - "TNOx_COMPRESS_MISS", - "TNOx_COMPRESS_STALL", - "TNOx_PCACHE_HIT", - "TNOx_PCACHE_MISS", - "TNOx_PCACHE_MISS_STALL", - "TNOx_PCACHE_EVICT_STALL", - "TNOx_PMGR_PTR_WR_STALL", - "TNOx_PMGR_PTR_RD_STALL", - "TNOx_PMGR_CMD_WR_STALL", - "TNOx_WRBUF_ACTIVE", - "TNOx_WRBUF_HIT", - "TNOx_WRBUF_MISS", - "TNOx_WRBUF_NO_FREE_LINE_STALL", - "TNOx_WRBUF_NO_AXI_ID_STALL", - "TNOx_WRBUF_AXI_STALL", - "", - "", - "", - "TNOx_UTLB_TRANS", - "TNOx_UTLB_TRANS_HIT", - "TNOx_UTLB_TRANS_STALL", - "TNOx_UTLB_TRANS_MISS_DELAY", - "TNOx_UTLB_MMU_REQ", - - /* Shader Core */ - "", - "", - "", - "", - "TNOx_FRAG_ACTIVE", - "TNOx_FRAG_PRIMITIVES", - "TNOx_FRAG_PRIM_RAST", - "TNOx_FRAG_FPK_ACTIVE", - "TNOx_FRAG_STARVING", - "TNOx_FRAG_WARPS", - "TNOx_FRAG_PARTIAL_WARPS", - "TNOx_FRAG_QUADS_RAST", - "TNOx_FRAG_QUADS_EZS_TEST", - "TNOx_FRAG_QUADS_EZS_UPDATE", - "TNOx_FRAG_QUADS_EZS_KILL", - "TNOx_FRAG_LZS_TEST", - "TNOx_FRAG_LZS_KILL", - "TNOx_WARP_REG_SIZE_64", - "TNOx_FRAG_PTILES", - "TNOx_FRAG_TRANS_ELIM", - "TNOx_QUAD_FPK_KILLER", - "TNOx_FULL_QUAD_WARPS", - "TNOx_COMPUTE_ACTIVE", - "TNOx_COMPUTE_TASKS", - "TNOx_COMPUTE_WARPS", - "TNOx_COMPUTE_STARVING", - "TNOx_EXEC_CORE_ACTIVE", - "TNOx_EXEC_ACTIVE", - "TNOx_EXEC_INSTR_COUNT", - "TNOx_EXEC_INSTR_DIVERGED", - "TNOx_EXEC_INSTR_STARVING", - "TNOx_ARITH_INSTR_SINGLE_FMA", - "TNOx_ARITH_INSTR_DOUBLE", - "TNOx_ARITH_INSTR_MSG", - "TNOx_ARITH_INSTR_MSG_ONLY", - "TNOx_TEX_MSGI_NUM_QUADS", - "TNOx_TEX_DFCH_NUM_PASSES", - "TNOx_TEX_DFCH_NUM_PASSES_MISS", - "TNOx_TEX_DFCH_NUM_PASSES_MIP_MAP", - "TNOx_TEX_TIDX_NUM_SPLIT_MIP_MAP", - "TNOx_TEX_TFCH_NUM_LINES_FETCHED", - "TNOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED", - "TNOx_TEX_TFCH_NUM_OPERATIONS", - "TNOx_TEX_FILT_NUM_OPERATIONS", - "TNOx_LS_MEM_READ_FULL", - "TNOx_LS_MEM_READ_SHORT", - "TNOx_LS_MEM_WRITE_FULL", - "TNOx_LS_MEM_WRITE_SHORT", - "TNOx_LS_MEM_ATOMIC", - "TNOx_VARY_INSTR", - "TNOx_VARY_SLOT_32", - "TNOx_VARY_SLOT_16", - "TNOx_ATTR_INSTR", - "TNOx_ARITH_INSTR_FP_MUL", - "TNOx_BEATS_RD_FTC", - "TNOx_BEATS_RD_FTC_EXT", - "TNOx_BEATS_RD_LSC", - "TNOx_BEATS_RD_LSC_EXT", - "TNOx_BEATS_RD_TEX", - "TNOx_BEATS_RD_TEX_EXT", - "TNOx_BEATS_RD_OTHER", - "TNOx_BEATS_WR_LSC_OTHER", - "TNOx_BEATS_WR_TIB", - "TNOx_BEATS_WR_LSC_WB", - - /* L2 and MMU */ - "", - "", - "", - "", - "TNOx_MMU_REQUESTS", - "TNOx_MMU_TABLE_READS_L3", - "TNOx_MMU_TABLE_READS_L2", - "TNOx_MMU_HIT_L3", - "TNOx_MMU_HIT_L2", - "TNOx_MMU_S2_REQUESTS", - "TNOx_MMU_S2_TABLE_READS_L3", - "TNOx_MMU_S2_TABLE_READS_L2", - "TNOx_MMU_S2_HIT_L3", - "TNOx_MMU_S2_HIT_L2", - "", - "", - "TNOx_L2_RD_MSG_IN", - "TNOx_L2_RD_MSG_IN_STALL", - "TNOx_L2_WR_MSG_IN", - "TNOx_L2_WR_MSG_IN_STALL", - "TNOx_L2_SNP_MSG_IN", - "TNOx_L2_SNP_MSG_IN_STALL", - "TNOx_L2_RD_MSG_OUT", - "TNOx_L2_RD_MSG_OUT_STALL", - "TNOx_L2_WR_MSG_OUT", - "TNOx_L2_ANY_LOOKUP", - "TNOx_L2_READ_LOOKUP", - "TNOx_L2_WRITE_LOOKUP", - "TNOx_L2_EXT_SNOOP_LOOKUP", - "TNOx_L2_EXT_READ", - "TNOx_L2_EXT_READ_NOSNP", - "TNOx_L2_EXT_READ_UNIQUE", - "TNOx_L2_EXT_READ_BEATS", - "TNOx_L2_EXT_AR_STALL", - "TNOx_L2_EXT_AR_CNT_Q1", - "TNOx_L2_EXT_AR_CNT_Q2", - "TNOx_L2_EXT_AR_CNT_Q3", - "TNOx_L2_EXT_RRESP_0_127", - "TNOx_L2_EXT_RRESP_128_191", - "TNOx_L2_EXT_RRESP_192_255", - "TNOx_L2_EXT_RRESP_256_319", - "TNOx_L2_EXT_RRESP_320_383", - "TNOx_L2_EXT_WRITE", - "TNOx_L2_EXT_WRITE_NOSNP_FULL", - "TNOx_L2_EXT_WRITE_NOSNP_PTL", - "TNOx_L2_EXT_WRITE_SNP_FULL", - "TNOx_L2_EXT_WRITE_SNP_PTL", - "TNOx_L2_EXT_WRITE_BEATS", - "TNOx_L2_EXT_W_STALL", - "TNOx_L2_EXT_AW_CNT_Q1", - "TNOx_L2_EXT_AW_CNT_Q2", - "TNOx_L2_EXT_AW_CNT_Q3", - "TNOx_L2_EXT_SNOOP", - "TNOx_L2_EXT_SNOOP_STALL", - "TNOx_L2_EXT_SNOOP_RESP_CLEAN", - "TNOx_L2_EXT_SNOOP_RESP_DATA", - "TNOx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", - }; - - static const char * const hardware_counters_mali_tGOx[] = { - /* Job Manager */ - "", - "", - "", - "", - "TGOx_MESSAGES_SENT", - "TGOx_MESSAGES_RECEIVED", - "TGOx_GPU_ACTIVE", - "TGOx_IRQ_ACTIVE", - "TGOx_JS0_JOBS", - "TGOx_JS0_TASKS", - "TGOx_JS0_ACTIVE", - "TGOx_JS0_WAIT_FLUSH", - "TGOx_JS0_WAIT_READ", - "TGOx_JS0_WAIT_ISSUE", - "TGOx_JS0_WAIT_DEPEND", - "TGOx_JS0_WAIT_FINISH", - "TGOx_JS1_JOBS", - "TGOx_JS1_TASKS", - "TGOx_JS1_ACTIVE", - "TGOx_JS1_WAIT_FLUSH", - "TGOx_JS1_WAIT_READ", - "TGOx_JS1_WAIT_ISSUE", - "TGOx_JS1_WAIT_DEPEND", - "TGOx_JS1_WAIT_FINISH", - "TGOx_JS2_JOBS", - "TGOx_JS2_TASKS", - "TGOx_JS2_ACTIVE", - "TGOx_JS2_WAIT_FLUSH", - "TGOx_JS2_WAIT_READ", - "TGOx_JS2_WAIT_ISSUE", - "TGOx_JS2_WAIT_DEPEND", - "TGOx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TGOx_CACHE_FLUSH", - - /* Tiler */ - "", - "", - "", - "", - "TGOx_TILER_ACTIVE", - "TGOx_JOBS_PROCESSED", - "TGOx_TRIANGLES", - "TGOx_LINES", - "TGOx_POINTS", - "TGOx_FRONT_FACING", - "TGOx_BACK_FACING", - "TGOx_PRIM_VISIBLE", - "TGOx_PRIM_CULLED", - "TGOx_PRIM_CLIPPED", - "TGOx_PRIM_SAT_CULLED", - "TGOx_BIN_ALLOC_INIT", - "TGOx_BIN_ALLOC_OVERFLOW", - "TGOx_BUS_READ", - "", - "TGOx_BUS_WRITE", - "TGOx_LOADING_DESC", - "TGOx_IDVS_POS_SHAD_REQ", - "TGOx_IDVS_POS_SHAD_WAIT", - "TGOx_IDVS_POS_SHAD_STALL", - "TGOx_IDVS_POS_FIFO_FULL", - "TGOx_PREFETCH_STALL", - "TGOx_VCACHE_HIT", - "TGOx_VCACHE_MISS", - "TGOx_VCACHE_LINE_WAIT", - "TGOx_VFETCH_POS_READ_WAIT", - "TGOx_VFETCH_VERTEX_WAIT", - "TGOx_VFETCH_STALL", - "TGOx_PRIMASSY_STALL", - "TGOx_BBOX_GEN_STALL", - "TGOx_IDVS_VBU_HIT", - "TGOx_IDVS_VBU_MISS", - "TGOx_IDVS_VBU_LINE_DEALLOCATE", - "TGOx_IDVS_VAR_SHAD_REQ", - "TGOx_IDVS_VAR_SHAD_STALL", - "TGOx_BINNER_STALL", - "TGOx_ITER_STALL", - "TGOx_COMPRESS_MISS", - "TGOx_COMPRESS_STALL", - "TGOx_PCACHE_HIT", - "TGOx_PCACHE_MISS", - "TGOx_PCACHE_MISS_STALL", - "TGOx_PCACHE_EVICT_STALL", - "TGOx_PMGR_PTR_WR_STALL", - "TGOx_PMGR_PTR_RD_STALL", - "TGOx_PMGR_CMD_WR_STALL", - "TGOx_WRBUF_ACTIVE", - "TGOx_WRBUF_HIT", - "TGOx_WRBUF_MISS", - "TGOx_WRBUF_NO_FREE_LINE_STALL", - "TGOx_WRBUF_NO_AXI_ID_STALL", - "TGOx_WRBUF_AXI_STALL", - "", - "", - "", - "TGOx_UTLB_TRANS", - "TGOx_UTLB_TRANS_HIT", - "TGOx_UTLB_TRANS_STALL", - "TGOx_UTLB_TRANS_MISS_DELAY", - "TGOx_UTLB_MMU_REQ", - - /* Shader Core */ - "", - "", - "", - "", - "TGOx_FRAG_ACTIVE", - "TGOx_FRAG_PRIMITIVES", - "TGOx_FRAG_PRIM_RAST", - "TGOx_FRAG_FPK_ACTIVE", - "TGOx_FRAG_STARVING", - "TGOx_FRAG_WARPS", - "TGOx_FRAG_PARTIAL_WARPS", - "TGOx_FRAG_QUADS_RAST", - "TGOx_FRAG_QUADS_EZS_TEST", - "TGOx_FRAG_QUADS_EZS_UPDATE", - "TGOx_FRAG_QUADS_EZS_KILL", - "TGOx_FRAG_LZS_TEST", - "TGOx_FRAG_LZS_KILL", - "TGOx_WARP_REG_SIZE_64", - "TGOx_FRAG_PTILES", - "TGOx_FRAG_TRANS_ELIM", - "TGOx_QUAD_FPK_KILLER", - "TGOx_FULL_QUAD_WARPS", - "TGOx_COMPUTE_ACTIVE", - "TGOx_COMPUTE_TASKS", - "TGOx_COMPUTE_WARPS", - "TGOx_COMPUTE_STARVING", - "TGOx_EXEC_CORE_ACTIVE", - "TGOx_EXEC_ACTIVE", - "TGOx_EXEC_INSTR_COUNT", - "TGOx_EXEC_INSTR_DIVERGED", - "TGOx_EXEC_INSTR_STARVING", - "TGOx_ARITH_INSTR_SINGLE_FMA", - "TGOx_ARITH_INSTR_DOUBLE", - "TGOx_ARITH_INSTR_MSG", - "TGOx_ARITH_INSTR_MSG_ONLY", - "TGOx_TEX_MSGI_NUM_QUADS", - "TGOx_TEX_DFCH_NUM_PASSES", - "TGOx_TEX_DFCH_NUM_PASSES_MISS", - "TGOx_TEX_DFCH_NUM_PASSES_MIP_MAP", - "TGOx_TEX_TIDX_NUM_SPLIT_MIP_MAP", - "TGOx_TEX_TFCH_NUM_LINES_FETCHED", - "TGOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED", - "TGOx_TEX_TFCH_NUM_OPERATIONS", - "TGOx_TEX_FILT_NUM_OPERATIONS", - "TGOx_LS_MEM_READ_FULL", - "TGOx_LS_MEM_READ_SHORT", - "TGOx_LS_MEM_WRITE_FULL", - "TGOx_LS_MEM_WRITE_SHORT", - "TGOx_LS_MEM_ATOMIC", - "TGOx_VARY_INSTR", - "TGOx_VARY_SLOT_32", - "TGOx_VARY_SLOT_16", - "TGOx_ATTR_INSTR", - "TGOx_ARITH_INSTR_FP_MUL", - "TGOx_BEATS_RD_FTC", - "TGOx_BEATS_RD_FTC_EXT", - "TGOx_BEATS_RD_LSC", - "TGOx_BEATS_RD_LSC_EXT", - "TGOx_BEATS_RD_TEX", - "TGOx_BEATS_RD_TEX_EXT", - "TGOx_BEATS_RD_OTHER", - "TGOx_BEATS_WR_LSC_WB", - "TGOx_BEATS_WR_TIB", - "TGOx_BEATS_WR_LSC_OTHER", - - /* L2 and MMU */ - "", - "", - "", - "", - "TGOx_MMU_REQUESTS", - "TGOx_MMU_TABLE_READS_L3", - "TGOx_MMU_TABLE_READS_L2", - "TGOx_MMU_HIT_L3", - "TGOx_MMU_HIT_L2", - "TGOx_MMU_S2_REQUESTS", - "TGOx_MMU_S2_TABLE_READS_L3", - "TGOx_MMU_S2_TABLE_READS_L2", - "TGOx_MMU_S2_HIT_L3", - "TGOx_MMU_S2_HIT_L2", - "", - "", - "TGOx_L2_RD_MSG_IN", - "TGOx_L2_RD_MSG_IN_STALL", - "TGOx_L2_WR_MSG_IN", - "TGOx_L2_WR_MSG_IN_STALL", - "TGOx_L2_SNP_MSG_IN", - "TGOx_L2_SNP_MSG_IN_STALL", - "TGOx_L2_RD_MSG_OUT", - "TGOx_L2_RD_MSG_OUT_STALL", - "TGOx_L2_WR_MSG_OUT", - "TGOx_L2_ANY_LOOKUP", - "TGOx_L2_READ_LOOKUP", - "TGOx_L2_WRITE_LOOKUP", - "TGOx_L2_EXT_SNOOP_LOOKUP", - "TGOx_L2_EXT_READ", - "TGOx_L2_EXT_READ_NOSNP", - "TGOx_L2_EXT_READ_UNIQUE", - "TGOx_L2_EXT_READ_BEATS", - "TGOx_L2_EXT_AR_STALL", - "TGOx_L2_EXT_AR_CNT_Q1", - "TGOx_L2_EXT_AR_CNT_Q2", - "TGOx_L2_EXT_AR_CNT_Q3", - "TGOx_L2_EXT_RRESP_0_127", - "TGOx_L2_EXT_RRESP_128_191", - "TGOx_L2_EXT_RRESP_192_255", - "TGOx_L2_EXT_RRESP_256_319", - "TGOx_L2_EXT_RRESP_320_383", - "TGOx_L2_EXT_WRITE", - "TGOx_L2_EXT_WRITE_NOSNP_FULL", - "TGOx_L2_EXT_WRITE_NOSNP_PTL", - "TGOx_L2_EXT_WRITE_SNP_FULL", - "TGOx_L2_EXT_WRITE_SNP_PTL", - "TGOx_L2_EXT_WRITE_BEATS", - "TGOx_L2_EXT_W_STALL", - "TGOx_L2_EXT_AW_CNT_Q1", - "TGOx_L2_EXT_AW_CNT_Q2", - "TGOx_L2_EXT_AW_CNT_Q3", - "TGOx_L2_EXT_SNOOP", - "TGOx_L2_EXT_SNOOP_STALL", - "TGOx_L2_EXT_SNOOP_RESP_CLEAN", - "TGOx_L2_EXT_SNOOP_RESP_DATA", - "TGOx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", - }; - - static const char * const hardware_counters_mali_tTRx[] = { - /* Job Manager */ - "", - "", - "", - "", - "TTRx_MESSAGES_SENT", - "TTRx_MESSAGES_RECEIVED", - "TTRx_GPU_ACTIVE", - "TTRx_IRQ_ACTIVE", - "TTRx_JS0_JOBS", - "TTRx_JS0_TASKS", - "TTRx_JS0_ACTIVE", - "TTRx_JS0_WAIT_FLUSH", - "TTRx_JS0_WAIT_READ", - "TTRx_JS0_WAIT_ISSUE", - "TTRx_JS0_WAIT_DEPEND", - "TTRx_JS0_WAIT_FINISH", - "TTRx_JS1_JOBS", - "TTRx_JS1_TASKS", - "TTRx_JS1_ACTIVE", - "TTRx_JS1_WAIT_FLUSH", - "TTRx_JS1_WAIT_READ", - "TTRx_JS1_WAIT_ISSUE", - "TTRx_JS1_WAIT_DEPEND", - "TTRx_JS1_WAIT_FINISH", - "TTRx_JS2_JOBS", - "TTRx_JS2_TASKS", - "TTRx_JS2_ACTIVE", - "TTRx_JS2_WAIT_FLUSH", - "TTRx_JS2_WAIT_READ", - "TTRx_JS2_WAIT_ISSUE", - "TTRx_JS2_WAIT_DEPEND", - "TTRx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TTRx_CACHE_FLUSH", - - /* Tiler */ - "", - "", - "", - "", - "TTRx_TILER_ACTIVE", - "TTRx_JOBS_PROCESSED", - "TTRx_TRIANGLES", - "TTRx_LINES", - "TTRx_POINTS", - "TTRx_FRONT_FACING", - "TTRx_BACK_FACING", - "TTRx_PRIM_VISIBLE", - "TTRx_PRIM_CULLED", - "TTRx_PRIM_CLIPPED", - "TTRx_PRIM_SAT_CULLED", - "TTRx_BIN_ALLOC_INIT", - "TTRx_BIN_ALLOC_OVERFLOW", - "TTRx_BUS_READ", - "TTRx_BUS_WRITE_UTLB0", - "TTRx_BUS_WRITE_UTLB1", - "TTRx_LOADING_DESC", - "TTRx_IDVS_POS_SHAD_REQ", - "TTRx_IDVS_POS_SHAD_WAIT", - "TTRx_IDVS_POS_SHAD_STALL", - "TTRx_IDVS_POS_FIFO_FULL", - "TTRx_PREFETCH_STALL", - "TTRx_VCACHE_HIT", - "TTRx_VCACHE_MISS", - "TTRx_VCACHE_LINE_WAIT", - "TTRx_VFETCH_POS_READ_WAIT", - "TTRx_VFETCH_VERTEX_WAIT", - "TTRx_VFETCH_STALL", - "TTRx_PRIMASSY_STALL", - "TTRx_BBOX_GEN_STALL", - "TTRx_IDVS_VBU_HIT", - "TTRx_IDVS_VBU_MISS", - "TTRx_IDVS_VBU_LINE_DEALLOCATE", - "TTRx_IDVS_VAR_SHAD_REQ", - "TTRx_IDVS_VAR_SHAD_STALL", - "TTRx_BINNER_STALL", - "TTRx_ITER_STALL", - "TTRx_COMPRESS_MISS", - "TTRx_COMPRESS_STALL", - "TTRx_PCACHE_HIT", - "TTRx_PCACHE_MISS", - "TTRx_PCACHE_MISS_STALL", - "TTRx_PCACHE_EVICT_STALL", - "TTRx_PMGR_PTR_WR_STALL", - "TTRx_PMGR_PTR_RD_STALL", - "TTRx_PMGR_CMD_WR_STALL", - "TTRx_WRBUF_ACTIVE", - "TTRx_WRBUF_HIT", - "TTRx_WRBUF_MISS", - "TTRx_WRBUF_NO_FREE_LINE_STALL", - "TTRx_WRBUF_NO_AXI_ID_STALL", - "TTRx_WRBUF_AXI_STALL", - "", - "", - "", - "TTRx_UTLB_TRANS", - "TTRx_UTLB_TRANS_HIT", - "TTRx_UTLB_TRANS_STALL", - "TTRx_UTLB_TRANS_MISS_DELAY", - "TTRx_UTLB_MMU_REQ", - - /* Shader Core */ - "", - "", - "", - "", - "TTRx_FRAG_ACTIVE", - "TTRx_FRAG_PRIMITIVES_OUT", - "TTRx_FRAG_PRIM_RAST", - "TTRx_FRAG_FPK_ACTIVE", - "TTRx_FRAG_STARVING", - "TTRx_FRAG_WARPS", - "TTRx_FRAG_PARTIAL_QUADS_RAST", - "TTRx_FRAG_QUADS_RAST", - "TTRx_FRAG_QUADS_EZS_TEST", - "TTRx_FRAG_QUADS_EZS_UPDATE", - "TTRx_FRAG_QUADS_EZS_KILL", - "TTRx_FRAG_LZS_TEST", - "TTRx_FRAG_LZS_KILL", - "TTRx_WARP_REG_SIZE_64", - "TTRx_FRAG_PTILES", - "TTRx_FRAG_TRANS_ELIM", - "TTRx_QUAD_FPK_KILLER", - "TTRx_FULL_QUAD_WARPS", - "TTRx_COMPUTE_ACTIVE", - "TTRx_COMPUTE_TASKS", - "TTRx_COMPUTE_WARPS", - "TTRx_COMPUTE_STARVING", - "TTRx_EXEC_CORE_ACTIVE", - "TTRx_EXEC_INSTR_FMA", - "TTRx_EXEC_INSTR_CVT", - "TTRx_EXEC_INSTR_SFU", - "TTRx_EXEC_INSTR_MSG", - "TTRx_EXEC_INSTR_DIVERGED", - "TTRx_EXEC_ICACHE_MISS", - "TTRx_EXEC_STARVE_ARITH", - "TTRx_CALL_BLEND_SHADER", - "TTRx_TEX_MSGI_NUM_FLITS", - "TTRx_TEX_DFCH_CLK_STALLED", - "TTRx_TEX_TFCH_CLK_STALLED", - "TTRx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", - "TTRx_TEX_FILT_NUM_OPERATIONS", - "TTRx_TEX_FILT_NUM_FXR_OPERATIONS", - "TTRx_TEX_FILT_NUM_FST_OPERATIONS", - "TTRx_TEX_MSGO_NUM_MSG", - "TTRx_TEX_MSGO_NUM_FLITS", - "TTRx_LS_MEM_READ_FULL", - "TTRx_LS_MEM_READ_SHORT", - "TTRx_LS_MEM_WRITE_FULL", - "TTRx_LS_MEM_WRITE_SHORT", - "TTRx_LS_MEM_ATOMIC", - "TTRx_VARY_INSTR", - "TTRx_VARY_SLOT_32", - "TTRx_VARY_SLOT_16", - "TTRx_ATTR_INSTR", - "TTRx_ARITH_INSTR_FP_MUL", - "TTRx_BEATS_RD_FTC", - "TTRx_BEATS_RD_FTC_EXT", - "TTRx_BEATS_RD_LSC", - "TTRx_BEATS_RD_LSC_EXT", - "TTRx_BEATS_RD_TEX", - "TTRx_BEATS_RD_TEX_EXT", - "TTRx_BEATS_RD_OTHER", - "TTRx_BEATS_WR_LSC_OTHER", - "TTRx_BEATS_WR_TIB", - "TTRx_BEATS_WR_LSC_WB", - - /* L2 and MMU */ - "", - "", - "", - "", - "TTRx_MMU_REQUESTS", - "TTRx_MMU_TABLE_READS_L3", - "TTRx_MMU_TABLE_READS_L2", - "TTRx_MMU_HIT_L3", - "TTRx_MMU_HIT_L2", - "TTRx_MMU_S2_REQUESTS", - "TTRx_MMU_S2_TABLE_READS_L3", - "TTRx_MMU_S2_TABLE_READS_L2", - "TTRx_MMU_S2_HIT_L3", - "TTRx_MMU_S2_HIT_L2", - "", - "", - "TTRx_L2_RD_MSG_IN", - "TTRx_L2_RD_MSG_IN_STALL", - "TTRx_L2_WR_MSG_IN", - "TTRx_L2_WR_MSG_IN_STALL", - "TTRx_L2_SNP_MSG_IN", - "TTRx_L2_SNP_MSG_IN_STALL", - "TTRx_L2_RD_MSG_OUT", - "TTRx_L2_RD_MSG_OUT_STALL", - "TTRx_L2_WR_MSG_OUT", - "TTRx_L2_ANY_LOOKUP", - "TTRx_L2_READ_LOOKUP", - "TTRx_L2_WRITE_LOOKUP", - "TTRx_L2_EXT_SNOOP_LOOKUP", - "TTRx_L2_EXT_READ", - "TTRx_L2_EXT_READ_NOSNP", - "TTRx_L2_EXT_READ_UNIQUE", - "TTRx_L2_EXT_READ_BEATS", - "TTRx_L2_EXT_AR_STALL", - "TTRx_L2_EXT_AR_CNT_Q1", - "TTRx_L2_EXT_AR_CNT_Q2", - "TTRx_L2_EXT_AR_CNT_Q3", - "TTRx_L2_EXT_RRESP_0_127", - "TTRx_L2_EXT_RRESP_128_191", - "TTRx_L2_EXT_RRESP_192_255", - "TTRx_L2_EXT_RRESP_256_319", - "TTRx_L2_EXT_RRESP_320_383", - "TTRx_L2_EXT_WRITE", - "TTRx_L2_EXT_WRITE_NOSNP_FULL", - "TTRx_L2_EXT_WRITE_NOSNP_PTL", - "TTRx_L2_EXT_WRITE_SNP_FULL", - "TTRx_L2_EXT_WRITE_SNP_PTL", - "TTRx_L2_EXT_WRITE_BEATS", - "TTRx_L2_EXT_W_STALL", - "TTRx_L2_EXT_AW_CNT_Q1", - "TTRx_L2_EXT_AW_CNT_Q2", - "TTRx_L2_EXT_AW_CNT_Q3", - "TTRx_L2_EXT_SNOOP", - "TTRx_L2_EXT_SNOOP_STALL", - "TTRx_L2_EXT_SNOOP_RESP_CLEAN", - "TTRx_L2_EXT_SNOOP_RESP_DATA", - "TTRx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", - }; - - static const char * const hardware_counters_mali_tNAx[] = { - /* Job Manager */ - "", - "", - "", - "", - "TNAx_MESSAGES_SENT", - "TNAx_MESSAGES_RECEIVED", - "TNAx_GPU_ACTIVE", - "TNAx_IRQ_ACTIVE", - "TNAx_JS0_JOBS", - "TNAx_JS0_TASKS", - "TNAx_JS0_ACTIVE", - "TNAx_JS0_WAIT_FLUSH", - "TNAx_JS0_WAIT_READ", - "TNAx_JS0_WAIT_ISSUE", - "TNAx_JS0_WAIT_DEPEND", - "TNAx_JS0_WAIT_FINISH", - "TNAx_JS1_JOBS", - "TNAx_JS1_TASKS", - "TNAx_JS1_ACTIVE", - "TNAx_JS1_WAIT_FLUSH", - "TNAx_JS1_WAIT_READ", - "TNAx_JS1_WAIT_ISSUE", - "TNAx_JS1_WAIT_DEPEND", - "TNAx_JS1_WAIT_FINISH", - "TNAx_JS2_JOBS", - "TNAx_JS2_TASKS", - "TNAx_JS2_ACTIVE", - "TNAx_JS2_WAIT_FLUSH", - "TNAx_JS2_WAIT_READ", - "TNAx_JS2_WAIT_ISSUE", - "TNAx_JS2_WAIT_DEPEND", - "TNAx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TNAx_CACHE_FLUSH", - - /* Tiler */ - "", - "", - "", - "", - "TNAx_TILER_ACTIVE", - "TNAx_JOBS_PROCESSED", - "TNAx_TRIANGLES", - "TNAx_LINES", - "TNAx_POINTS", - "TNAx_FRONT_FACING", - "TNAx_BACK_FACING", - "TNAx_PRIM_VISIBLE", - "TNAx_PRIM_CULLED", - "TNAx_PRIM_CLIPPED", - "TNAx_PRIM_SAT_CULLED", - "TNAx_BIN_ALLOC_INIT", - "TNAx_BIN_ALLOC_OVERFLOW", - "TNAx_BUS_READ", - "TNAx_BUS_WRITE_UTLB0", - "TNAx_BUS_WRITE_UTLB1", - "TNAx_LOADING_DESC", - "TNAx_IDVS_POS_SHAD_REQ", - "TNAx_IDVS_POS_SHAD_WAIT", - "TNAx_IDVS_POS_SHAD_STALL", - "TNAx_IDVS_POS_FIFO_FULL", - "TNAx_PREFETCH_STALL", - "TNAx_VCACHE_HIT", - "TNAx_VCACHE_MISS", - "TNAx_VCACHE_LINE_WAIT", - "TNAx_VFETCH_POS_READ_WAIT", - "TNAx_VFETCH_VERTEX_WAIT", - "TNAx_VFETCH_STALL", - "TNAx_PRIMASSY_STALL", - "TNAx_BBOX_GEN_STALL", - "TNAx_IDVS_VBU_HIT", - "TNAx_IDVS_VBU_MISS", - "TNAx_IDVS_VBU_LINE_DEALLOCATE", - "TNAx_IDVS_VAR_SHAD_REQ", - "TNAx_IDVS_VAR_SHAD_STALL", - "TNAx_BINNER_STALL", - "TNAx_ITER_STALL", - "TNAx_COMPRESS_MISS", - "TNAx_COMPRESS_STALL", - "TNAx_PCACHE_HIT", - "TNAx_PCACHE_MISS", - "TNAx_PCACHE_MISS_STALL", - "TNAx_PCACHE_EVICT_STALL", - "TNAx_PMGR_PTR_WR_STALL", - "TNAx_PMGR_PTR_RD_STALL", - "TNAx_PMGR_CMD_WR_STALL", - "TNAx_WRBUF_ACTIVE", - "TNAx_WRBUF_HIT", - "TNAx_WRBUF_MISS", - "TNAx_WRBUF_NO_FREE_LINE_STALL", - "TNAx_WRBUF_NO_AXI_ID_STALL", - "TNAx_WRBUF_AXI_STALL", - "", - "", - "", - "TNAx_UTLB_TRANS", - "TNAx_UTLB_TRANS_HIT", - "TNAx_UTLB_TRANS_STALL", - "TNAx_UTLB_TRANS_MISS_DELAY", - "TNAx_UTLB_MMU_REQ", - - /* Shader Core */ - "", - "", - "", - "", - "TNAx_FRAG_ACTIVE", - "TNAx_FRAG_PRIMITIVES_OUT", - "TNAx_FRAG_PRIM_RAST", - "TNAx_FRAG_FPK_ACTIVE", - "TNAx_FRAG_STARVING", - "TNAx_FRAG_WARPS", - "TNAx_FRAG_PARTIAL_QUADS_RAST", - "TNAx_FRAG_QUADS_RAST", - "TNAx_FRAG_QUADS_EZS_TEST", - "TNAx_FRAG_QUADS_EZS_UPDATE", - "TNAx_FRAG_QUADS_EZS_KILL", - "TNAx_FRAG_LZS_TEST", - "TNAx_FRAG_LZS_KILL", - "TNAx_WARP_REG_SIZE_64", - "TNAx_FRAG_PTILES", - "TNAx_FRAG_TRANS_ELIM", - "TNAx_QUAD_FPK_KILLER", - "TNAx_FULL_QUAD_WARPS", - "TNAx_COMPUTE_ACTIVE", - "TNAx_COMPUTE_TASKS", - "TNAx_COMPUTE_WARPS", - "TNAx_COMPUTE_STARVING", - "TNAx_EXEC_CORE_ACTIVE", - "TNAx_EXEC_INSTR_FMA", - "TNAx_EXEC_INSTR_CVT", - "TNAx_EXEC_INSTR_SFU", - "TNAx_EXEC_INSTR_MSG", - "TNAx_EXEC_INSTR_DIVERGED", - "TNAx_EXEC_ICACHE_MISS", - "TNAx_EXEC_STARVE_ARITH", - "TNAx_CALL_BLEND_SHADER", - "TNAx_TEX_MSGI_NUM_FLITS", - "TNAx_TEX_DFCH_CLK_STALLED", - "TNAx_TEX_TFCH_CLK_STALLED", - "TNAx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", - "TNAx_TEX_FILT_NUM_OPERATIONS", - "TNAx_TEX_FILT_NUM_FXR_OPERATIONS", - "TNAx_TEX_FILT_NUM_FST_OPERATIONS", - "TNAx_TEX_MSGO_NUM_MSG", - "TNAx_TEX_MSGO_NUM_FLITS", - "TNAx_LS_MEM_READ_FULL", - "TNAx_LS_MEM_READ_SHORT", - "TNAx_LS_MEM_WRITE_FULL", - "TNAx_LS_MEM_WRITE_SHORT", - "TNAx_LS_MEM_ATOMIC", - "TNAx_VARY_INSTR", - "TNAx_VARY_SLOT_32", - "TNAx_VARY_SLOT_16", - "TNAx_ATTR_INSTR", - "TNAx_ARITH_INSTR_FP_MUL", - "TNAx_BEATS_RD_FTC", - "TNAx_BEATS_RD_FTC_EXT", - "TNAx_BEATS_RD_LSC", - "TNAx_BEATS_RD_LSC_EXT", - "TNAx_BEATS_RD_TEX", - "TNAx_BEATS_RD_TEX_EXT", - "TNAx_BEATS_RD_OTHER", - "TNAx_BEATS_WR_LSC_OTHER", - "TNAx_BEATS_WR_TIB", - "TNAx_BEATS_WR_LSC_WB", - - /* L2 and MMU */ - "", - "", - "", - "", - "TNAx_MMU_REQUESTS", - "TNAx_MMU_TABLE_READS_L3", - "TNAx_MMU_TABLE_READS_L2", - "TNAx_MMU_HIT_L3", - "TNAx_MMU_HIT_L2", - "TNAx_MMU_S2_REQUESTS", - "TNAx_MMU_S2_TABLE_READS_L3", - "TNAx_MMU_S2_TABLE_READS_L2", - "TNAx_MMU_S2_HIT_L3", - "TNAx_MMU_S2_HIT_L2", - "", - "", - "TNAx_L2_RD_MSG_IN", - "TNAx_L2_RD_MSG_IN_STALL", - "TNAx_L2_WR_MSG_IN", - "TNAx_L2_WR_MSG_IN_STALL", - "TNAx_L2_SNP_MSG_IN", - "TNAx_L2_SNP_MSG_IN_STALL", - "TNAx_L2_RD_MSG_OUT", - "TNAx_L2_RD_MSG_OUT_STALL", - "TNAx_L2_WR_MSG_OUT", - "TNAx_L2_ANY_LOOKUP", - "TNAx_L2_READ_LOOKUP", - "TNAx_L2_WRITE_LOOKUP", - "TNAx_L2_EXT_SNOOP_LOOKUP", - "TNAx_L2_EXT_READ", - "TNAx_L2_EXT_READ_NOSNP", - "TNAx_L2_EXT_READ_UNIQUE", - "TNAx_L2_EXT_READ_BEATS", - "TNAx_L2_EXT_AR_STALL", - "TNAx_L2_EXT_AR_CNT_Q1", - "TNAx_L2_EXT_AR_CNT_Q2", - "TNAx_L2_EXT_AR_CNT_Q3", - "TNAx_L2_EXT_RRESP_0_127", - "TNAx_L2_EXT_RRESP_128_191", - "TNAx_L2_EXT_RRESP_192_255", - "TNAx_L2_EXT_RRESP_256_319", - "TNAx_L2_EXT_RRESP_320_383", - "TNAx_L2_EXT_WRITE", - "TNAx_L2_EXT_WRITE_NOSNP_FULL", - "TNAx_L2_EXT_WRITE_NOSNP_PTL", - "TNAx_L2_EXT_WRITE_SNP_FULL", - "TNAx_L2_EXT_WRITE_SNP_PTL", - "TNAx_L2_EXT_WRITE_BEATS", - "TNAx_L2_EXT_W_STALL", - "TNAx_L2_EXT_AW_CNT_Q1", - "TNAx_L2_EXT_AW_CNT_Q2", - "TNAx_L2_EXT_AW_CNT_Q3", - "TNAx_L2_EXT_SNOOP", - "TNAx_L2_EXT_SNOOP_STALL", - "TNAx_L2_EXT_SNOOP_RESP_CLEAN", - "TNAx_L2_EXT_SNOOP_RESP_DATA", - "TNAx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", - }; - - static const char * const hardware_counters_mali_tOTx[] = { - /* Job Manager */ - "", - "", - "", - "", - "TOTx_MESSAGES_SENT", - "TOTx_MESSAGES_RECEIVED", - "TOTx_GPU_ACTIVE", - "TOTx_IRQ_ACTIVE", - "TOTx_JS0_JOBS", - "TOTx_JS0_TASKS", - "TOTx_JS0_ACTIVE", - "TOTx_JS0_WAIT_FLUSH", - "TOTx_JS0_WAIT_READ", - "TOTx_JS0_WAIT_ISSUE", - "TOTx_JS0_WAIT_DEPEND", - "TOTx_JS0_WAIT_FINISH", - "TOTx_JS1_JOBS", - "TOTx_JS1_TASKS", - "TOTx_JS1_ACTIVE", - "TOTx_JS1_WAIT_FLUSH", - "TOTx_JS1_WAIT_READ", - "TOTx_JS1_WAIT_ISSUE", - "TOTx_JS1_WAIT_DEPEND", - "TOTx_JS1_WAIT_FINISH", - "TOTx_JS2_JOBS", - "TOTx_JS2_TASKS", - "TOTx_JS2_ACTIVE", - "TOTx_JS2_WAIT_FLUSH", - "TOTx_JS2_WAIT_READ", - "TOTx_JS2_WAIT_ISSUE", - "TOTx_JS2_WAIT_DEPEND", - "TOTx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TOTx_CACHE_FLUSH", - - /* Tiler */ - "", - "", - "", - "", - "TOTx_TILER_ACTIVE", - "TOTx_JOBS_PROCESSED", - "TOTx_TRIANGLES", - "TOTx_LINES", - "TOTx_POINTS", - "TOTx_FRONT_FACING", - "TOTx_BACK_FACING", - "TOTx_PRIM_VISIBLE", - "TOTx_PRIM_CULLED", - "TOTx_PRIM_CLIPPED", - "TOTx_PRIM_SAT_CULLED", - "TOTx_BIN_ALLOC_INIT", - "TOTx_BIN_ALLOC_OVERFLOW", - "TOTx_BUS_READ", - "TOTx_BUS_WRITE_UTLB0", - "TOTx_BUS_WRITE_UTLB1", - "TOTx_LOADING_DESC", - "TOTx_IDVS_POS_SHAD_REQ", - "TOTx_IDVS_POS_SHAD_WAIT", - "TOTx_IDVS_POS_SHAD_STALL", - "TOTx_IDVS_POS_FIFO_FULL", - "TOTx_PREFETCH_STALL", - "TOTx_VCACHE_HIT", - "TOTx_VCACHE_MISS", - "TOTx_VCACHE_LINE_WAIT", - "TOTx_VFETCH_POS_READ_WAIT", - "TOTx_VFETCH_VERTEX_WAIT", - "TOTx_VFETCH_STALL", - "TOTx_PRIMASSY_STALL", - "TOTx_BBOX_GEN_STALL", - "TOTx_IDVS_VBU_HIT", - "TOTx_IDVS_VBU_MISS", - "TOTx_IDVS_VBU_LINE_DEALLOCATE", - "TOTx_IDVS_VAR_SHAD_REQ", - "TOTx_IDVS_VAR_SHAD_STALL", - "TOTx_BINNER_STALL", - "TOTx_ITER_STALL", - "TOTx_COMPRESS_MISS", - "TOTx_COMPRESS_STALL", - "TOTx_PCACHE_HIT", - "TOTx_PCACHE_MISS", - "TOTx_PCACHE_MISS_STALL", - "TOTx_PCACHE_EVICT_STALL", - "TOTx_PMGR_PTR_WR_STALL", - "TOTx_PMGR_PTR_RD_STALL", - "TOTx_PMGR_CMD_WR_STALL", - "TOTx_WRBUF_ACTIVE", - "TOTx_WRBUF_HIT", - "TOTx_WRBUF_MISS", - "TOTx_WRBUF_NO_FREE_LINE_STALL", - "TOTx_WRBUF_NO_AXI_ID_STALL", - "TOTx_WRBUF_AXI_STALL", - "TOTx_UTLB0_TRANS", - "TOTx_UTLB0_TRANS_HIT", - "TOTx_UTLB0_TRANS_STALL", - "TOTx_UTLB0_MMU_REQ", - "TOTx_UTLB1_TRANS", - "TOTx_UTLB1_TRANS_HIT", - "TOTx_UTLB1_TRANS_STALL", - "TOTx_UTLB1_MMU_REQ", - - /* Shader Core */ - "", - "", - "", - "", - "TOTx_FRAG_ACTIVE", - "TOTx_FRAG_PRIMITIVES_OUT", - "TOTx_FRAG_PRIM_RAST", - "TOTx_FRAG_FPK_ACTIVE", - "TOTx_FRAG_STARVING", - "TOTx_FRAG_WARPS", - "TOTx_FRAG_PARTIAL_QUADS_RAST", - "TOTx_FRAG_QUADS_RAST", - "TOTx_FRAG_QUADS_EZS_TEST", - "TOTx_FRAG_QUADS_EZS_UPDATE", - "TOTx_FRAG_QUADS_EZS_KILL", - "TOTx_FRAG_LZS_TEST", - "TOTx_FRAG_LZS_KILL", - "TOTx_WARP_REG_SIZE_64", - "TOTx_FRAG_PTILES", - "TOTx_FRAG_TRANS_ELIM", - "TOTx_QUAD_FPK_KILLER", - "TOTx_FULL_QUAD_WARPS", - "TOTx_COMPUTE_ACTIVE", - "TOTx_COMPUTE_TASKS", - "TOTx_COMPUTE_WARPS", - "TOTx_COMPUTE_STARVING", - "TOTx_EXEC_CORE_ACTIVE", - "TOTx_EXEC_INSTR_FMA", - "TOTx_EXEC_INSTR_CVT", - "TOTx_EXEC_INSTR_SFU", - "TOTx_EXEC_INSTR_MSG", - "TOTx_EXEC_INSTR_DIVERGED", - "TOTx_EXEC_ICACHE_MISS", - "TOTx_EXEC_STARVE_ARITH", - "TOTx_CALL_BLEND_SHADER", - "TOTx_TEX_MSGI_NUM_FLITS", - "TOTx_TEX_DFCH_CLK_STALLED", - "TOTx_TEX_TFCH_CLK_STALLED", - "TOTx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", - "TOTx_TEX_FILT_NUM_OPERATIONS", - "TOTx_TEX_FILT_NUM_FXR_OPERATIONS", - "TOTx_TEX_FILT_NUM_FST_OPERATIONS", - "TOTx_TEX_MSGO_NUM_MSG", - "TOTx_TEX_MSGO_NUM_FLITS", - "TOTx_LS_MEM_READ_FULL", - "TOTx_LS_MEM_READ_SHORT", - "TOTx_LS_MEM_WRITE_FULL", - "TOTx_LS_MEM_WRITE_SHORT", - "TOTx_LS_MEM_ATOMIC", - "TOTx_VARY_INSTR", - "TOTx_VARY_SLOT_32", - "TOTx_VARY_SLOT_16", - "TOTx_ATTR_INSTR", - "TOTx_SHADER_CORE_ACTIVE", - "TOTx_BEATS_RD_FTC", - "TOTx_BEATS_RD_FTC_EXT", - "TOTx_BEATS_RD_LSC", - "TOTx_BEATS_RD_LSC_EXT", - "TOTx_BEATS_RD_TEX", - "TOTx_BEATS_RD_TEX_EXT", - "TOTx_BEATS_RD_OTHER", - "TOTx_BEATS_WR_LSC_OTHER", - "TOTx_BEATS_WR_TIB", - "TOTx_BEATS_WR_LSC_WB", - - /* L2 and MMU */ - "", - "", - "", - "", - "TOTx_MMU_REQUESTS", - "TOTx_MMU_TABLE_READS_L3", - "TOTx_MMU_TABLE_READS_L2", - "TOTx_MMU_HIT_L3", - "TOTx_MMU_HIT_L2", - "TOTx_MMU_S2_REQUESTS", - "TOTx_MMU_S2_TABLE_READS_L3", - "TOTx_MMU_S2_TABLE_READS_L2", - "TOTx_MMU_S2_HIT_L3", - "TOTx_MMU_S2_HIT_L2", - "", - "", - "TOTx_L2_RD_MSG_IN", - "TOTx_L2_RD_MSG_IN_STALL", - "TOTx_L2_WR_MSG_IN", - "TOTx_L2_WR_MSG_IN_STALL", - "TOTx_L2_SNP_MSG_IN", - "TOTx_L2_SNP_MSG_IN_STALL", - "TOTx_L2_RD_MSG_OUT", - "TOTx_L2_RD_MSG_OUT_STALL", - "TOTx_L2_WR_MSG_OUT", - "TOTx_L2_ANY_LOOKUP", - "TOTx_L2_READ_LOOKUP", - "TOTx_L2_WRITE_LOOKUP", - "TOTx_L2_EXT_SNOOP_LOOKUP", - "TOTx_L2_EXT_READ", - "TOTx_L2_EXT_READ_NOSNP", - "TOTx_L2_EXT_READ_UNIQUE", - "TOTx_L2_EXT_READ_BEATS", - "TOTx_L2_EXT_AR_STALL", - "TOTx_L2_EXT_AR_CNT_Q1", - "TOTx_L2_EXT_AR_CNT_Q2", - "TOTx_L2_EXT_AR_CNT_Q3", - "TOTx_L2_EXT_RRESP_0_127", - "TOTx_L2_EXT_RRESP_128_191", - "TOTx_L2_EXT_RRESP_192_255", - "TOTx_L2_EXT_RRESP_256_319", - "TOTx_L2_EXT_RRESP_320_383", - "TOTx_L2_EXT_WRITE", - "TOTx_L2_EXT_WRITE_NOSNP_FULL", - "TOTx_L2_EXT_WRITE_NOSNP_PTL", - "TOTx_L2_EXT_WRITE_SNP_FULL", - "TOTx_L2_EXT_WRITE_SNP_PTL", - "TOTx_L2_EXT_WRITE_BEATS", - "TOTx_L2_EXT_W_STALL", - "TOTx_L2_EXT_AW_CNT_Q1", - "TOTx_L2_EXT_AW_CNT_Q2", - "TOTx_L2_EXT_AW_CNT_Q3", - "TOTx_L2_EXT_SNOOP", - "TOTx_L2_EXT_SNOOP_STALL", - "TOTx_L2_EXT_SNOOP_RESP_CLEAN", - "TOTx_L2_EXT_SNOOP_RESP_DATA", - "TOTx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", - }; - - static const char * const hardware_counters_mali_tBOx[] = { - /* Job Manager */ - "", - "", - "", - "", - "TBOx_MESSAGES_SENT", - "TBOx_MESSAGES_RECEIVED", - "TBOx_GPU_ACTIVE", - "TBOx_IRQ_ACTIVE", - "TBOx_JS0_JOBS", - "TBOx_JS0_TASKS", - "TBOx_JS0_ACTIVE", - "TBOx_JS0_WAIT_FLUSH", - "TBOx_JS0_WAIT_READ", - "TBOx_JS0_WAIT_ISSUE", - "TBOx_JS0_WAIT_DEPEND", - "TBOx_JS0_WAIT_FINISH", - "TBOx_JS1_JOBS", - "TBOx_JS1_TASKS", - "TBOx_JS1_ACTIVE", - "TBOx_JS1_WAIT_FLUSH", - "TBOx_JS1_WAIT_READ", - "TBOx_JS1_WAIT_ISSUE", - "TBOx_JS1_WAIT_DEPEND", - "TBOx_JS1_WAIT_FINISH", - "TBOx_JS2_JOBS", - "TBOx_JS2_TASKS", - "TBOx_JS2_ACTIVE", - "TBOx_JS2_WAIT_FLUSH", - "TBOx_JS2_WAIT_READ", - "TBOx_JS2_WAIT_ISSUE", - "TBOx_JS2_WAIT_DEPEND", - "TBOx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TBOx_CACHE_FLUSH", - - /* Tiler */ - "", - "", - "", - "", - "TBOx_TILER_ACTIVE", - "TBOx_JOBS_PROCESSED", - "TBOx_TRIANGLES", - "TBOx_LINES", - "TBOx_POINTS", - "TBOx_FRONT_FACING", - "TBOx_BACK_FACING", - "TBOx_PRIM_VISIBLE", - "TBOx_PRIM_CULLED", - "TBOx_PRIM_CLIPPED", - "TBOx_PRIM_SAT_CULLED", - "TBOx_BIN_ALLOC_INIT", - "TBOx_BIN_ALLOC_OVERFLOW", - "TBOx_BUS_READ", - "TBOx_BUS_WRITE_UTLB0", - "TBOx_BUS_WRITE_UTLB1", - "TBOx_LOADING_DESC", - "TBOx_IDVS_POS_SHAD_REQ", - "TBOx_IDVS_POS_SHAD_WAIT", - "TBOx_IDVS_POS_SHAD_STALL", - "TBOx_IDVS_POS_FIFO_FULL", - "TBOx_PREFETCH_STALL", - "TBOx_VCACHE_HIT", - "TBOx_VCACHE_MISS", - "TBOx_VCACHE_LINE_WAIT", - "TBOx_VFETCH_POS_READ_WAIT", - "TBOx_VFETCH_VERTEX_WAIT", - "TBOx_VFETCH_STALL", - "TBOx_PRIMASSY_STALL", - "TBOx_BBOX_GEN_STALL", - "TBOx_IDVS_VBU_HIT", - "TBOx_IDVS_VBU_MISS", - "TBOx_IDVS_VBU_LINE_DEALLOCATE", - "TBOx_IDVS_VAR_SHAD_REQ", - "TBOx_IDVS_VAR_SHAD_STALL", - "TBOx_BINNER_STALL", - "TBOx_ITER_STALL", - "TBOx_COMPRESS_MISS", - "TBOx_COMPRESS_STALL", - "TBOx_PCACHE_HIT", - "TBOx_PCACHE_MISS", - "TBOx_PCACHE_MISS_STALL", - "TBOx_PCACHE_EVICT_STALL", - "TBOx_PMGR_PTR_WR_STALL", - "TBOx_PMGR_PTR_RD_STALL", - "TBOx_PMGR_CMD_WR_STALL", - "TBOx_WRBUF_ACTIVE", - "TBOx_WRBUF_HIT", - "TBOx_WRBUF_MISS", - "TBOx_WRBUF_NO_FREE_LINE_STALL", - "TBOx_WRBUF_NO_AXI_ID_STALL", - "TBOx_WRBUF_AXI_STALL", - "TBOx_UTLB0_TRANS", - "TBOx_UTLB0_TRANS_HIT", - "TBOx_UTLB0_TRANS_STALL", - "TBOx_UTLB0_MMU_REQ", - "TBOx_UTLB1_TRANS", - "TBOx_UTLB1_TRANS_HIT", - "TBOx_UTLB1_TRANS_STALL", - "TBOx_UTLB1_MMU_REQ", - - /* Shader Core */ - "", - "", - "", - "", - "TBOx_FRAG_ACTIVE", - "TBOx_FRAG_PRIMITIVES_OUT", - "TBOx_FRAG_PRIM_RAST", - "TBOx_FRAG_FPK_ACTIVE", - "TBOx_FRAG_STARVING", - "TBOx_FRAG_WARPS", - "TBOx_FRAG_PARTIAL_QUADS_RAST", - "TBOx_FRAG_QUADS_RAST", - "TBOx_FRAG_QUADS_EZS_TEST", - "TBOx_FRAG_QUADS_EZS_UPDATE", - "TBOx_FRAG_QUADS_EZS_KILL", - "TBOx_FRAG_LZS_TEST", - "TBOx_FRAG_LZS_KILL", - "TBOx_WARP_REG_SIZE_64", - "TBOx_FRAG_PTILES", - "TBOx_FRAG_TRANS_ELIM", - "TBOx_QUAD_FPK_KILLER", - "TBOx_FULL_QUAD_WARPS", - "TBOx_COMPUTE_ACTIVE", - "TBOx_COMPUTE_TASKS", - "TBOx_COMPUTE_WARPS", - "TBOx_COMPUTE_STARVING", - "TBOx_EXEC_CORE_ACTIVE", - "TBOx_EXEC_INSTR_FMA", - "TBOx_EXEC_INSTR_CVT", - "TBOx_EXEC_INSTR_SFU", - "TBOx_EXEC_INSTR_MSG", - "TBOx_EXEC_INSTR_DIVERGED", - "TBOx_EXEC_ICACHE_MISS", - "TBOx_EXEC_STARVE_ARITH", - "TBOx_CALL_BLEND_SHADER", - "TBOx_TEX_MSGI_NUM_FLITS", - "TBOx_TEX_DFCH_CLK_STALLED", - "TBOx_TEX_TFCH_CLK_STALLED", - "TBOx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", - "TBOx_TEX_FILT_NUM_OPERATIONS", - "TBOx_TEX_FILT_NUM_FXR_OPERATIONS", - "TBOx_TEX_FILT_NUM_FST_OPERATIONS", - "TBOx_TEX_MSGO_NUM_MSG", - "TBOx_TEX_MSGO_NUM_FLITS", - "TBOx_LS_MEM_READ_FULL", - "TBOx_LS_MEM_READ_SHORT", - "TBOx_LS_MEM_WRITE_FULL", - "TBOx_LS_MEM_WRITE_SHORT", - "TBOx_LS_MEM_ATOMIC", - "TBOx_VARY_INSTR", - "TBOx_VARY_SLOT_32", - "TBOx_VARY_SLOT_16", - "TBOx_ATTR_INSTR", - "TBOx_SHADER_CORE_ACTIVE", - "TBOx_BEATS_RD_FTC", - "TBOx_BEATS_RD_FTC_EXT", - "TBOx_BEATS_RD_LSC", - "TBOx_BEATS_RD_LSC_EXT", - "TBOx_BEATS_RD_TEX", - "TBOx_BEATS_RD_TEX_EXT", - "TBOx_BEATS_RD_OTHER", - "TBOx_BEATS_WR_LSC_OTHER", - "TBOx_BEATS_WR_TIB", - "TBOx_BEATS_WR_LSC_WB", - - /* L2 and MMU */ - "", - "", - "", - "", - "TBOx_MMU_REQUESTS", - "TBOx_MMU_TABLE_READS_L3", - "TBOx_MMU_TABLE_READS_L2", - "TBOx_MMU_HIT_L3", - "TBOx_MMU_HIT_L2", - "TBOx_MMU_S2_REQUESTS", - "TBOx_MMU_S2_TABLE_READS_L3", - "TBOx_MMU_S2_TABLE_READS_L2", - "TBOx_MMU_S2_HIT_L3", - "TBOx_MMU_S2_HIT_L2", - "", - "", - "TBOx_L2_RD_MSG_IN", - "TBOx_L2_RD_MSG_IN_STALL", - "TBOx_L2_WR_MSG_IN", - "TBOx_L2_WR_MSG_IN_STALL", - "TBOx_L2_SNP_MSG_IN", - "TBOx_L2_SNP_MSG_IN_STALL", - "TBOx_L2_RD_MSG_OUT", - "TBOx_L2_RD_MSG_OUT_STALL", - "TBOx_L2_WR_MSG_OUT", - "TBOx_L2_ANY_LOOKUP", - "TBOx_L2_READ_LOOKUP", - "TBOx_L2_WRITE_LOOKUP", - "TBOx_L2_EXT_SNOOP_LOOKUP", - "TBOx_L2_EXT_READ", - "TBOx_L2_EXT_READ_NOSNP", - "TBOx_L2_EXT_READ_UNIQUE", - "TBOx_L2_EXT_READ_BEATS", - "TBOx_L2_EXT_AR_STALL", - "TBOx_L2_EXT_AR_CNT_Q1", - "TBOx_L2_EXT_AR_CNT_Q2", - "TBOx_L2_EXT_AR_CNT_Q3", - "TBOx_L2_EXT_RRESP_0_127", - "TBOx_L2_EXT_RRESP_128_191", - "TBOx_L2_EXT_RRESP_192_255", - "TBOx_L2_EXT_RRESP_256_319", - "TBOx_L2_EXT_RRESP_320_383", - "TBOx_L2_EXT_WRITE", - "TBOx_L2_EXT_WRITE_NOSNP_FULL", - "TBOx_L2_EXT_WRITE_NOSNP_PTL", - "TBOx_L2_EXT_WRITE_SNP_FULL", - "TBOx_L2_EXT_WRITE_SNP_PTL", - "TBOx_L2_EXT_WRITE_BEATS", - "TBOx_L2_EXT_W_STALL", - "TBOx_L2_EXT_AW_CNT_Q1", - "TBOx_L2_EXT_AW_CNT_Q2", - "TBOx_L2_EXT_AW_CNT_Q3", - "TBOx_L2_EXT_SNOOP", - "TBOx_L2_EXT_SNOOP_STALL", - "TBOx_L2_EXT_SNOOP_RESP_CLEAN", - "TBOx_L2_EXT_SNOOP_RESP_DATA", - "TBOx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", - }; - static const char * const hardware_counters_mali_tODx[] = { - /* Frontend */ - "", - "", - "", - "", - "TODx_GPU_ACTIVE", - "TODx_MCU_ACTIVE", - "TODx_GPU_ITER_ACTIVE", - "TODx_MMU_FLUSH_COUNT", - "", - "", - "TODx_IRQ_ACTIVE", - "TODx_GPU_IRQ_COUNT", - "", - "", - "TODx_DOORBELL_IRQ_ACTIVE", - "TODx_DOORBELL_IRQ_COUNT", - "TODx_ITER_TILER_ACTIVE", - "TODx_ITER_TILER_JOB_COMPLETED", - "TODx_ITER_TILER_IDVS_TASK_COMPLETED", - "", - "", - "", - "", - "", - "TODx_ITER_COMP_ACTIVE", - "TODx_ITER_COMP_JOB_COMPLETED", - "TODx_ITER_COMP_TASK_COMPLETED", - "", - "", - "", - "", - "", - "TODx_ITER_FRAG_ACTIVE", - "TODx_ITER_FRAG_JOB_COMPLETED", - "TODx_ITER_FRAG_TASK_COMPLETED", - "", - "", - "", - "", - "", - "TODx_CEU_ACTIVE", - "", - "", - "", - "", - "TODx_LSU_ACTIVE", - "", - "", - "TODx_CSHWIF0_ENABLED", - "", - "", - "", - "TODx_CSHWIF1_ENABLED", - "", - "", - "", - "TODx_CSHWIF2_ENABLED", - "", - "", - "", - "TODx_CSHWIF3_ENABLED", - "", - "", - "", - - /* Tiler */ - "", - "", - "", - "", - "TODx_TILER_ACTIVE", - "", - "TODx_TRIANGLES", - "TODx_LINES", - "TODx_POINTS", - "TODx_FRONT_FACING", - "TODx_BACK_FACING", - "TODx_PRIM_VISIBLE", - "TODx_PRIM_CULLED", - "TODx_PRIM_CLIPPED", - "TODx_PRIM_SAT_CULLED", - "", - "", - "TODx_BUS_READ", - "TODx_BUS_WRITE_UTLB0", - "TODx_BUS_WRITE_UTLB1", - "TODx_SUSPENDED", - "TODx_IDVS_POS_SHAD_REQ", - "", - "TODx_IDVS_POS_SHAD_STALL", - "TODx_IDVS_POS_FIFO_FULL", - "", - "TODx_VCACHE_HIT", - "TODx_VCACHE_MISS", - "", - "", - "", - "TODx_VFETCH_STALL", - "", - "TODx_RESUMED", - "TODx_IDVS_VBU_HIT", - "TODx_IDVS_VBU_MISS", - "", - "TODx_IDVS_VAR_SHAD_REQ", - "TODx_IDVS_VAR_SHAD_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TODx_WRBUF_NO_AXI_ID_STALL", - "TODx_WRBUF_AXI_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Shader Core */ - "", - "", - "", - "", - "TODx_FRAG_ACTIVE", - "TODx_FRAG_PRIMITIVES_OUT", - "TODx_FRAG_PRIM_RAST", - "TODx_FRAG_FPK_ACTIVE", - "", - "TODx_FRAG_WARPS", - "TODx_FRAG_PARTIAL_QUADS_RAST", - "TODx_FRAG_QUADS_RAST", - "TODx_FRAG_QUADS_EZS_TEST", - "TODx_FRAG_QUADS_EZS_UPDATE", - "TODx_FRAG_QUADS_EZS_KILL", - "TODx_FRAG_LZS_TEST", - "TODx_FRAG_LZS_KILL", - "TODx_WARP_REG_SIZE_64", - "TODx_FRAG_PTILES", - "TODx_FRAG_TRANS_ELIM", - "TODx_QUAD_FPK_KILLER", - "TODx_FULL_QUAD_WARPS", - "TODx_COMPUTE_ACTIVE", - "TODx_COMPUTE_TASKS", - "TODx_COMPUTE_WARPS", - "TODx_COMPUTE_STARVING", - "TODx_EXEC_CORE_ACTIVE", - "TODx_EXEC_INSTR_FMA", - "TODx_EXEC_INSTR_CVT", - "TODx_EXEC_INSTR_SFU", - "TODx_EXEC_INSTR_MSG", - "TODx_EXEC_INSTR_DIVERGED", - "TODx_EXEC_ICACHE_MISS", - "TODx_EXEC_STARVE_ARITH", - "TODx_CALL_BLEND_SHADER", - "TODx_TEX_MSGI_NUM_FLITS", - "TODx_TEX_DFCH_CLK_STALLED", - "TODx_TEX_TFCH_CLK_STALLED", - "TODx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", - "TODx_TEX_FILT_NUM_OPERATIONS", - "TODx_TEX_FILT_NUM_FXR_OPERATIONS", - "TODx_TEX_FILT_NUM_FST_OPERATIONS", - "TODx_TEX_MSGO_NUM_MSG", - "TODx_TEX_MSGO_NUM_FLITS", - "TODx_LS_MEM_READ_FULL", - "TODx_LS_MEM_READ_SHORT", - "TODx_LS_MEM_WRITE_FULL", - "TODx_LS_MEM_WRITE_SHORT", - "TODx_LS_MEM_ATOMIC", - "TODx_VARY_INSTR", - "TODx_VARY_SLOT_32", - "TODx_VARY_SLOT_16", - "TODx_ATTR_INSTR", - "TODx_SHADER_CORE_ACTIVE", - "TODx_BEATS_RD_FTC", - "TODx_BEATS_RD_FTC_EXT", - "TODx_BEATS_RD_LSC", - "TODx_BEATS_RD_LSC_EXT", - "TODx_BEATS_RD_TEX", - "TODx_BEATS_RD_TEX_EXT", - "TODx_BEATS_RD_OTHER", - "TODx_BEATS_WR_LSC_OTHER", - "TODx_BEATS_WR_TIB", - "TODx_BEATS_WR_LSC_WB", - - /* L2 and MMU */ - "", - "", - "", - "", - "TODx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "TODx_L2_RD_MSG_IN_EVICT", - "TODx_L2_RD_MSG_IN_CU", - "", - "", - "TODx_L2_RD_MSG_IN", - "TODx_L2_RD_MSG_IN_STALL", - "TODx_L2_WR_MSG_IN", - "TODx_L2_WR_MSG_IN_STALL", - "TODx_L2_SNP_MSG_IN", - "TODx_L2_SNP_MSG_IN_STALL", - "TODx_L2_RD_MSG_OUT", - "TODx_L2_RD_MSG_OUT_STALL", - "TODx_L2_WR_MSG_OUT", - "TODx_L2_ANY_LOOKUP", - "TODx_L2_READ_LOOKUP", - "TODx_L2_WRITE_LOOKUP", - "TODx_L2_EXT_SNOOP_LOOKUP", - "TODx_L2_EXT_READ", - "TODx_L2_EXT_READ_NOSNP", - "TODx_L2_EXT_READ_UNIQUE", - "TODx_L2_EXT_READ_BEATS", - "TODx_L2_EXT_AR_STALL", - "TODx_L2_EXT_AR_CNT_Q1", - "TODx_L2_EXT_AR_CNT_Q2", - "TODx_L2_EXT_AR_CNT_Q3", - "TODx_L2_EXT_RRESP_0_127", - "TODx_L2_EXT_RRESP_128_191", - "TODx_L2_EXT_RRESP_192_255", - "TODx_L2_EXT_RRESP_256_319", - "TODx_L2_EXT_RRESP_320_383", - "TODx_L2_EXT_WRITE", - "TODx_L2_EXT_WRITE_NOSNP_FULL", - "TODx_L2_EXT_WRITE_NOSNP_PTL", - "TODx_L2_EXT_WRITE_SNP_FULL", - "TODx_L2_EXT_WRITE_SNP_PTL", - "TODx_L2_EXT_WRITE_BEATS", - "TODx_L2_EXT_W_STALL", - "TODx_L2_EXT_AW_CNT_Q1", - "TODx_L2_EXT_AW_CNT_Q2", - "TODx_L2_EXT_AW_CNT_Q3", - "TODx_L2_EXT_SNOOP", - "TODx_L2_EXT_SNOOP_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - }; - static const char * const hardware_counters_mali_tVIx[] = { - - /* Frontend */ - "", - "", - "", - "", - "TVIx_GPU_ACTIVE", - "TVIx_MCU_ACTIVE", - "TVIx_GPU_ITER_ACTIVE", - "TVIx_MMU_FLUSH_COUNT", - "", - "", - "TVIx_IRQ_ACTIVE", - "TVIx_GPU_IRQ_COUNT", - "", - "", - "TVIx_DOORBELL_IRQ_ACTIVE", - "TVIx_DOORBELL_IRQ_COUNT", - "TVIx_ITER_TILER_ACTIVE", - "TVIx_ITER_TILER_JOB_COMPLETED", - "TVIx_ITER_TILER_IDVS_TASK_COMPLETED", - "", - "", - "", - "", - "", - "TVIx_ITER_COMP_ACTIVE", - "TVIx_ITER_COMP_JOB_COMPLETED", - "TVIx_ITER_COMP_TASK_COMPLETED", - "", - "", - "", - "", - "", - "TVIx_ITER_FRAG_ACTIVE", - "TVIx_ITER_FRAG_JOB_COMPLETED", - "TVIx_ITER_FRAG_TASK_COMPLETED", - "", - "", - "", - "", - "", - "TVIx_CEU_ACTIVE", - "", - "", - "", - "", - "TVIx_LSU_ACTIVE", - "", - "", - "TVIx_CSHWIF0_ENABLED", - "", - "", - "", - "TVIx_CSHWIF1_ENABLED", - "", - "", - "", - "TVIx_CSHWIF2_ENABLED", - "", - "", - "", - "TVIx_CSHWIF3_ENABLED", - "", - "", - "", - - /* Tiler */ - "", - "", - "", - "", - "TVIx_TILER_ACTIVE", - "", - "TVIx_TRIANGLES", - "TVIx_LINES", - "TVIx_POINTS", - "TVIx_FRONT_FACING", - "TVIx_BACK_FACING", - "TVIx_PRIM_VISIBLE", - "TVIx_PRIM_CULLED", - "TVIx_PRIM_CLIPPED", - "TVIx_PRIM_SAT_CULLED", - "", - "", - "TVIx_BUS_READ", - "TVIx_BUS_WRITE_UTLB0", - "TVIx_BUS_WRITE_UTLB1", - "TVIx_SUSPENDED", - "TVIx_IDVS_POS_SHAD_REQ", - "", - "TVIx_IDVS_POS_SHAD_STALL", - "TVIx_IDVS_POS_FIFO_FULL", - "", - "TVIx_VCACHE_HIT", - "TVIx_VCACHE_MISS", - "", - "", - "", - "TVIx_VFETCH_STALL", - "", - "TVIx_RESUMED", - "TVIx_IDVS_VBU_HIT", - "TVIx_IDVS_VBU_MISS", - "", - "TVIx_IDVS_VAR_SHAD_REQ", - "TVIx_IDVS_VAR_SHAD_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TVIx_WRBUF_NO_AXI_ID_STALL", - "TVIx_WRBUF_AXI_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Shader Core */ - "", - "", - "", - "", - "TVIx_FRAG_ACTIVE", - "TVIx_FRAG_PRIMITIVES_OUT", - "TVIx_FRAG_PRIM_RAST", - "TVIx_FRAG_FPK_ACTIVE", - "", - "TVIx_FRAG_WARPS", - "TVIx_FRAG_PARTIAL_QUADS_RAST", - "TVIx_FRAG_QUADS_RAST", - "TVIx_FRAG_QUADS_EZS_TEST", - "TVIx_FRAG_QUADS_EZS_UPDATE", - "TVIx_FRAG_QUADS_EZS_KILL", - "TVIx_FRAG_LZS_TEST", - "TVIx_FRAG_LZS_KILL", - "TVIx_WARP_REG_SIZE_64", - "TVIx_FRAG_PTILES", - "TVIx_FRAG_TRANS_ELIM", - "TVIx_QUAD_FPK_KILLER", - "TVIx_FULL_QUAD_WARPS", - "TVIx_COMPUTE_ACTIVE", - "TVIx_COMPUTE_TASKS", - "TVIx_COMPUTE_WARPS", - "TVIx_COMPUTE_STARVING", - "TVIx_EXEC_CORE_ACTIVE", - "TVIx_EXEC_INSTR_FMA", - "TVIx_EXEC_INSTR_CVT", - "TVIx_EXEC_INSTR_SFU", - "TVIx_EXEC_INSTR_MSG", - "TVIx_EXEC_INSTR_DIVERGED", - "TVIx_EXEC_ICACHE_MISS", - "TVIx_EXEC_STARVE_ARITH", - "TVIx_CALL_BLEND_SHADER", - "TVIx_TEX_MSGI_NUM_FLITS", - "TVIx_TEX_DFCH_CLK_STALLED", - "TVIx_TEX_TFCH_CLK_STALLED", - "TVIx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", - "TVIx_TEX_FILT_NUM_OPERATIONS", - "TVIx_TEX_FILT_NUM_FXR_OPERATIONS", - "TVIx_TEX_FILT_NUM_FST_OPERATIONS", - "TVIx_TEX_MSGO_NUM_MSG", - "TVIx_TEX_MSGO_NUM_FLITS", - "TVIx_LS_MEM_READ_FULL", - "TVIx_LS_MEM_READ_SHORT", - "TVIx_LS_MEM_WRITE_FULL", - "TVIx_LS_MEM_WRITE_SHORT", - "TVIx_LS_MEM_ATOMIC", - "TVIx_VARY_INSTR", - "TVIx_VARY_SLOT_32", - "TVIx_VARY_SLOT_16", - "TVIx_ATTR_INSTR", - "TVIx_SHADER_CORE_ACTIVE", - "TVIx_BEATS_RD_FTC", - "TVIx_BEATS_RD_FTC_EXT", - "TVIx_BEATS_RD_LSC", - "TVIx_BEATS_RD_LSC_EXT", - "TVIx_BEATS_RD_TEX", - "TVIx_BEATS_RD_TEX_EXT", - "TVIx_BEATS_RD_OTHER", - "TVIx_BEATS_WR_LSC_OTHER", - "TVIx_BEATS_WR_TIB", - "TVIx_BEATS_WR_LSC_WB", - - /* L2 and MMU */ - "", - "", - "", - "", - "TVIx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "TVIx_L2_RD_MSG_IN_EVICT", - "TVIx_L2_RD_MSG_IN_CU", - "", - "", - "TVIx_L2_RD_MSG_IN", - "TVIx_L2_RD_MSG_IN_STALL", - "TVIx_L2_WR_MSG_IN", - "TVIx_L2_WR_MSG_IN_STALL", - "TVIx_L2_SNP_MSG_IN", - "TVIx_L2_SNP_MSG_IN_STALL", - "TVIx_L2_RD_MSG_OUT", - "TVIx_L2_RD_MSG_OUT_STALL", - "TVIx_L2_WR_MSG_OUT", - "TVIx_L2_ANY_LOOKUP", - "TVIx_L2_READ_LOOKUP", - "TVIx_L2_WRITE_LOOKUP", - "TVIx_L2_EXT_SNOOP_LOOKUP", - "TVIx_L2_EXT_READ", - "TVIx_L2_EXT_READ_NOSNP", - "TVIx_L2_EXT_READ_UNIQUE", - "TVIx_L2_EXT_READ_BEATS", - "TVIx_L2_EXT_AR_STALL", - "TVIx_L2_EXT_AR_CNT_Q1", - "TVIx_L2_EXT_AR_CNT_Q2", - "TVIx_L2_EXT_AR_CNT_Q3", - "TVIx_L2_EXT_RRESP_0_127", - "TVIx_L2_EXT_RRESP_128_191", - "TVIx_L2_EXT_RRESP_192_255", - "TVIx_L2_EXT_RRESP_256_319", - "TVIx_L2_EXT_RRESP_320_383", - "TVIx_L2_EXT_WRITE", - "TVIx_L2_EXT_WRITE_NOSNP_FULL", - "TVIx_L2_EXT_WRITE_NOSNP_PTL", - "TVIx_L2_EXT_WRITE_SNP_FULL", - "TVIx_L2_EXT_WRITE_SNP_PTL", - "TVIx_L2_EXT_WRITE_BEATS", - "TVIx_L2_EXT_W_STALL", - "TVIx_L2_EXT_AW_CNT_Q1", - "TVIx_L2_EXT_AW_CNT_Q2", - "TVIx_L2_EXT_AW_CNT_Q3", - "TVIx_L2_EXT_SNOOP", - "TVIx_L2_EXT_SNOOP_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - }; - static const char * const hardware_counters_mali_tGRx[] = { - - /* Frontend */ - "", - "", - "", - "", - "TGRx_GPU_ACTIVE", - "TGRx_MCU_ACTIVE", - "TGRx_GPU_ITER_ACTIVE", - "TGRx_MMU_FLUSH_COUNT", - "", - "", - "TGRx_IRQ_ACTIVE", - "TGRx_GPU_IRQ_COUNT", - "", - "", - "TGRx_DOORBELL_IRQ_ACTIVE", - "TGRx_DOORBELL_IRQ_COUNT", - "TGRx_ITER_TILER_ACTIVE", - "TGRx_ITER_TILER_JOB_COMPLETED", - "TGRx_ITER_TILER_IDVS_TASK_COMPLETED", - "", - "", - "", - "", - "", - "TGRx_ITER_COMP_ACTIVE", - "TGRx_ITER_COMP_JOB_COMPLETED", - "TGRx_ITER_COMP_TASK_COMPLETED", - "", - "", - "", - "", - "", - "TGRx_ITER_FRAG_ACTIVE", - "TGRx_ITER_FRAG_JOB_COMPLETED", - "TGRx_ITER_FRAG_TASK_COMPLETED", - "", - "", - "", - "", - "", - "TGRx_CEU_ACTIVE", - "", - "", - "", - "", - "TGRx_LSU_ACTIVE", - "", - "", - "TGRx_CSHWIF0_ENABLED", - "", - "", - "", - "TGRx_CSHWIF1_ENABLED", - "", - "", - "", - "TGRx_CSHWIF2_ENABLED", - "", - "", - "", - "TGRx_CSHWIF3_ENABLED", - "", - "", - "", - - /* Tiler */ - "", - "", - "", - "", - "TGRx_TILER_ACTIVE", - "", - "TGRx_TRIANGLES", - "TGRx_LINES", - "TGRx_POINTS", - "TGRx_FRONT_FACING", - "TGRx_BACK_FACING", - "TGRx_PRIM_VISIBLE", - "TGRx_PRIM_CULLED", - "TGRx_PRIM_CLIPPED", - "TGRx_PRIM_SAT_CULLED", - "", - "", - "TGRx_BUS_READ", - "TGRx_BUS_WRITE_UTLB0", - "TGRx_BUS_WRITE_UTLB1", - "TGRx_SUSPENDED", - "TGRx_IDVS_POS_SHAD_REQ", - "", - "TGRx_IDVS_POS_SHAD_STALL", - "TGRx_IDVS_POS_FIFO_FULL", - "", - "TGRx_VCACHE_HIT", - "TGRx_VCACHE_MISS", - "", - "", - "", - "TGRx_VFETCH_STALL", - "", - "TGRx_RESUMED", - "TGRx_IDVS_VBU_HIT", - "TGRx_IDVS_VBU_MISS", - "", - "TGRx_IDVS_VAR_SHAD_REQ", - "TGRx_IDVS_VAR_SHAD_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TGRx_WRBUF_NO_AXI_ID_STALL", - "TGRx_WRBUF_AXI_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Shader Core */ - "", - "", - "", - "", - "TGRx_FRAG_ACTIVE", - "TGRx_FRAG_PRIMITIVES_OUT", - "TGRx_FRAG_PRIM_RAST", - "TGRx_FRAG_FPK_ACTIVE", - "", - "TGRx_FRAG_WARPS", - "TGRx_FRAG_PARTIAL_QUADS_RAST", - "TGRx_FRAG_QUADS_RAST", - "TGRx_FRAG_QUADS_EZS_TEST", - "TGRx_FRAG_QUADS_EZS_UPDATE", - "TGRx_FRAG_QUADS_EZS_KILL", - "TGRx_FRAG_LZS_TEST", - "TGRx_FRAG_LZS_KILL", - "TGRx_WARP_REG_SIZE_64", - "TGRx_FRAG_PTILES", - "TGRx_FRAG_TRANS_ELIM", - "TGRx_QUAD_FPK_KILLER", - "TGRx_FULL_QUAD_WARPS", - "TGRx_COMPUTE_ACTIVE", - "TGRx_COMPUTE_TASKS", - "TGRx_COMPUTE_WARPS", - "TGRx_COMPUTE_STARVING", - "TGRx_EXEC_CORE_ACTIVE", - "TGRx_EXEC_INSTR_FMA", - "TGRx_EXEC_INSTR_CVT", - "TGRx_EXEC_INSTR_SFU", - "TGRx_EXEC_INSTR_MSG", - "TGRx_EXEC_INSTR_DIVERGED", - "TGRx_EXEC_ICACHE_MISS", - "TGRx_EXEC_STARVE_ARITH", - "TGRx_CALL_BLEND_SHADER", - "TGRx_TEX_MSGI_NUM_FLITS", - "TGRx_TEX_DFCH_CLK_STALLED", - "TGRx_TEX_TFCH_CLK_STALLED", - "TGRx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", - "TGRx_TEX_FILT_NUM_OPERATIONS", - "TGRx_TEX_FILT_NUM_FXR_OPERATIONS", - "TGRx_TEX_FILT_NUM_FST_OPERATIONS", - "TGRx_TEX_MSGO_NUM_MSG", - "TGRx_TEX_MSGO_NUM_FLITS", - "TGRx_LS_MEM_READ_FULL", - "TGRx_LS_MEM_READ_SHORT", - "TGRx_LS_MEM_WRITE_FULL", - "TGRx_LS_MEM_WRITE_SHORT", - "TGRx_LS_MEM_ATOMIC", - "TGRx_VARY_INSTR", - "TGRx_VARY_SLOT_32", - "TGRx_VARY_SLOT_16", - "TGRx_ATTR_INSTR", - "TGRx_SHADER_CORE_ACTIVE", - "TGRx_BEATS_RD_FTC", - "TGRx_BEATS_RD_FTC_EXT", - "TGRx_BEATS_RD_LSC", - "TGRx_BEATS_RD_LSC_EXT", - "TGRx_BEATS_RD_TEX", - "TGRx_BEATS_RD_TEX_EXT", - "TGRx_BEATS_RD_OTHER", - "TGRx_BEATS_WR_LSC_OTHER", - "TGRx_BEATS_WR_TIB", - "TGRx_BEATS_WR_LSC_WB", - - /* L2 and MMU */ - "", - "", - "", - "", - "TGRx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "TGRx_L2_RD_MSG_IN_EVICT", - "TGRx_L2_RD_MSG_IN_CU", - "", - "", - "TGRx_L2_RD_MSG_IN", - "TGRx_L2_RD_MSG_IN_STALL", - "TGRx_L2_WR_MSG_IN", - "TGRx_L2_WR_MSG_IN_STALL", - "TGRx_L2_SNP_MSG_IN", - "TGRx_L2_SNP_MSG_IN_STALL", - "TGRx_L2_RD_MSG_OUT", - "TGRx_L2_RD_MSG_OUT_STALL", - "TGRx_L2_WR_MSG_OUT", - "TGRx_L2_ANY_LOOKUP", - "TGRx_L2_READ_LOOKUP", - "TGRx_L2_WRITE_LOOKUP", - "TGRx_L2_EXT_SNOOP_LOOKUP", - "TGRx_L2_EXT_READ", - "TGRx_L2_EXT_READ_NOSNP", - "TGRx_L2_EXT_READ_UNIQUE", - "TGRx_L2_EXT_READ_BEATS", - "TGRx_L2_EXT_AR_STALL", - "TGRx_L2_EXT_AR_CNT_Q1", - "TGRx_L2_EXT_AR_CNT_Q2", - "TGRx_L2_EXT_AR_CNT_Q3", - "TGRx_L2_EXT_RRESP_0_127", - "TGRx_L2_EXT_RRESP_128_191", - "TGRx_L2_EXT_RRESP_192_255", - "TGRx_L2_EXT_RRESP_256_319", - "TGRx_L2_EXT_RRESP_320_383", - "TGRx_L2_EXT_WRITE", - "TGRx_L2_EXT_WRITE_NOSNP_FULL", - "TGRx_L2_EXT_WRITE_NOSNP_PTL", - "TGRx_L2_EXT_WRITE_SNP_FULL", - "TGRx_L2_EXT_WRITE_SNP_PTL", - "TGRx_L2_EXT_WRITE_BEATS", - "TGRx_L2_EXT_W_STALL", - "TGRx_L2_EXT_AW_CNT_Q1", - "TGRx_L2_EXT_AW_CNT_Q2", - "TGRx_L2_EXT_AW_CNT_Q3", - "TGRx_L2_EXT_SNOOP", - "TGRx_L2_EXT_SNOOP_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - }; - static const char * const hardware_counters_mali_tVAx[] = { - - /* Frontend */ - "", - "", - "", - "", - "TVAx_GPU_ACTIVE", - "TVAx_MCU_ACTIVE", - "TVAx_GPU_ITER_ACTIVE", - "TVAx_MMU_FLUSH_COUNT", - "", - "", - "TVAx_IRQ_ACTIVE", - "TVAx_GPU_IRQ_COUNT", - "", - "", - "TVAx_DOORBELL_IRQ_ACTIVE", - "TVAx_DOORBELL_IRQ_COUNT", - "TVAx_ITER_TILER_ACTIVE", - "TVAx_ITER_TILER_JOB_COMPLETED", - "TVAx_ITER_TILER_IDVS_TASK_COMPLETED", - "", - "", - "", - "", - "", - "TVAx_ITER_COMP_ACTIVE", - "TVAx_ITER_COMP_JOB_COMPLETED", - "TVAx_ITER_COMP_TASK_COMPLETED", - "", - "", - "", - "", - "", - "TVAx_ITER_FRAG_ACTIVE", - "TVAx_ITER_FRAG_JOB_COMPLETED", - "TVAx_ITER_FRAG_TASK_COMPLETED", - "", - "", - "", - "", - "", - "TVAx_CEU_ACTIVE", - "", - "", - "", - "", - "TVAx_LSU_ACTIVE", - "", - "", - "TVAx_CSHWIF0_ENABLED", - "", - "", - "", - "TVAx_CSHWIF1_ENABLED", - "", - "", - "", - "TVAx_CSHWIF2_ENABLED", - "", - "", - "", - "TVAx_CSHWIF3_ENABLED", - "", - "", - "", - - /* Tiler */ - "", - "", - "", - "", - "TVAx_TILER_ACTIVE", - "", - "TVAx_TRIANGLES", - "TVAx_LINES", - "TVAx_POINTS", - "TVAx_FRONT_FACING", - "TVAx_BACK_FACING", - "TVAx_PRIM_VISIBLE", - "TVAx_PRIM_CULLED", - "TVAx_PRIM_CLIPPED", - "TVAx_PRIM_SAT_CULLED", - "", - "", - "TVAx_BUS_READ", - "TVAx_BUS_WRITE_UTLB0", - "TVAx_BUS_WRITE_UTLB1", - "TVAx_SUSPENDED", - "TVAx_IDVS_POS_SHAD_REQ", - "", - "TVAx_IDVS_POS_SHAD_STALL", - "TVAx_IDVS_POS_FIFO_FULL", - "", - "TVAx_VCACHE_HIT", - "TVAx_VCACHE_MISS", - "", - "", - "", - "TVAx_VFETCH_STALL", - "", - "TVAx_RESUMED", - "TVAx_IDVS_VBU_HIT", - "TVAx_IDVS_VBU_MISS", - "", - "TVAx_IDVS_VAR_SHAD_REQ", - "TVAx_IDVS_VAR_SHAD_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TVAx_WRBUF_NO_AXI_ID_STALL", - "TVAx_WRBUF_AXI_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Shader Core */ - "", - "", - "", - "", - "TVAx_FRAG_ACTIVE", - "TVAx_FRAG_PRIMITIVES_OUT", - "TVAx_FRAG_PRIM_RAST", - "TVAx_FRAG_FPK_ACTIVE", - "", - "TVAx_FRAG_WARPS", - "TVAx_FRAG_PARTIAL_QUADS_RAST", - "TVAx_FRAG_QUADS_RAST", - "TVAx_FRAG_QUADS_EZS_TEST", - "TVAx_FRAG_QUADS_EZS_UPDATE", - "TVAx_FRAG_QUADS_EZS_KILL", - "TVAx_FRAG_LZS_TEST", - "TVAx_FRAG_LZS_KILL", - "TVAx_WARP_REG_SIZE_64", - "TVAx_FRAG_PTILES", - "TVAx_FRAG_TRANS_ELIM", - "TVAx_QUAD_FPK_KILLER", - "TVAx_FULL_QUAD_WARPS", - "TVAx_COMPUTE_ACTIVE", - "TVAx_COMPUTE_TASKS", - "TVAx_COMPUTE_WARPS", - "TVAx_COMPUTE_STARVING", - "TVAx_EXEC_CORE_ACTIVE", - "TVAx_EXEC_INSTR_FMA", - "TVAx_EXEC_INSTR_CVT", - "TVAx_EXEC_INSTR_SFU", - "TVAx_EXEC_INSTR_MSG", - "TVAx_EXEC_INSTR_DIVERGED", - "TVAx_EXEC_ICACHE_MISS", - "TVAx_EXEC_STARVE_ARITH", - "TVAx_CALL_BLEND_SHADER", - "TVAx_TEX_MSGI_NUM_FLITS", - "TVAx_TEX_DFCH_CLK_STALLED", - "TVAx_TEX_TFCH_CLK_STALLED", - "TVAx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", - "TVAx_TEX_FILT_NUM_OPERATIONS", - "TVAx_TEX_FILT_NUM_FXR_OPERATIONS", - "TVAx_TEX_FILT_NUM_FST_OPERATIONS", - "TVAx_TEX_MSGO_NUM_MSG", - "TVAx_TEX_MSGO_NUM_FLITS", - "TVAx_LS_MEM_READ_FULL", - "TVAx_LS_MEM_READ_SHORT", - "TVAx_LS_MEM_WRITE_FULL", - "TVAx_LS_MEM_WRITE_SHORT", - "TVAx_LS_MEM_ATOMIC", - "TVAx_VARY_INSTR", - "TVAx_VARY_SLOT_32", - "TVAx_VARY_SLOT_16", - "TVAx_ATTR_INSTR", - "TVAx_SHADER_CORE_ACTIVE", - "TVAx_BEATS_RD_FTC", - "TVAx_BEATS_RD_FTC_EXT", - "TVAx_BEATS_RD_LSC", - "TVAx_BEATS_RD_LSC_EXT", - "TVAx_BEATS_RD_TEX", - "TVAx_BEATS_RD_TEX_EXT", - "TVAx_BEATS_RD_OTHER", - "TVAx_BEATS_WR_LSC_OTHER", - "TVAx_BEATS_WR_TIB", - "TVAx_BEATS_WR_LSC_WB", - - /* L2 and MMU */ - "", - "", - "", - "", - "TVAx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "TVAx_L2_RD_MSG_IN_EVICT", - "TVAx_L2_RD_MSG_IN_CU", - "", - "", - "TVAx_L2_RD_MSG_IN", - "TVAx_L2_RD_MSG_IN_STALL", - "TVAx_L2_WR_MSG_IN", - "TVAx_L2_WR_MSG_IN_STALL", - "TVAx_L2_SNP_MSG_IN", - "TVAx_L2_SNP_MSG_IN_STALL", - "TVAx_L2_RD_MSG_OUT", - "TVAx_L2_RD_MSG_OUT_STALL", - "TVAx_L2_WR_MSG_OUT", - "TVAx_L2_ANY_LOOKUP", - "TVAx_L2_READ_LOOKUP", - "TVAx_L2_WRITE_LOOKUP", - "TVAx_L2_EXT_SNOOP_LOOKUP", - "TVAx_L2_EXT_READ", - "TVAx_L2_EXT_READ_NOSNP", - "TVAx_L2_EXT_READ_UNIQUE", - "TVAx_L2_EXT_READ_BEATS", - "TVAx_L2_EXT_AR_STALL", - "TVAx_L2_EXT_AR_CNT_Q1", - "TVAx_L2_EXT_AR_CNT_Q2", - "TVAx_L2_EXT_AR_CNT_Q3", - "TVAx_L2_EXT_RRESP_0_127", - "TVAx_L2_EXT_RRESP_128_191", - "TVAx_L2_EXT_RRESP_192_255", - "TVAx_L2_EXT_RRESP_256_319", - "TVAx_L2_EXT_RRESP_320_383", - "TVAx_L2_EXT_WRITE", - "TVAx_L2_EXT_WRITE_NOSNP_FULL", - "TVAx_L2_EXT_WRITE_NOSNP_PTL", - "TVAx_L2_EXT_WRITE_SNP_FULL", - "TVAx_L2_EXT_WRITE_SNP_PTL", - "TVAx_L2_EXT_WRITE_BEATS", - "TVAx_L2_EXT_W_STALL", - "TVAx_L2_EXT_AW_CNT_Q1", - "TVAx_L2_EXT_AW_CNT_Q2", - "TVAx_L2_EXT_AW_CNT_Q3", - "TVAx_L2_EXT_SNOOP", - "TVAx_L2_EXT_SNOOP_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - }; } #endif /* NATIVE_GATOR_DAEMON_MALI_USERSPACE_MALIHWCNTRNAMES_H_ */ diff --git a/daemon/mali_userspace/MaliHwCntrNamesBifrost.h b/daemon/mali_userspace/MaliHwCntrNamesBifrost.h new file mode 100644 index 00000000..9d1e53ed --- /dev/null +++ b/daemon/mali_userspace/MaliHwCntrNamesBifrost.h @@ -0,0 +1,3750 @@ +/* Copyright (C) 2016-2022 by Arm Limited. All rights reserved. */ + +/* Note: content autogenerated using CounterGenerator utility. */ + +#ifndef NATIVE_GATOR_DAEMON_MALI_USERSPACE_MALIHWCNTRNAMES_BIFROST_H_ +#define NATIVE_GATOR_DAEMON_MALI_USERSPACE_MALIHWCNTRNAMES_BIFROST_H_ + +namespace mali_userspace { + + /* Mali-G31 */ + static const char * const hardware_counters_mali_tDVx[] = { + /* Job Manager */ + "", + "", + "", + "", + "TDVx_MESSAGES_SENT", + "TDVx_MESSAGES_RECEIVED", + "TDVx_GPU_ACTIVE", + "TDVx_IRQ_ACTIVE", + "TDVx_JS0_JOBS", + "TDVx_JS0_TASKS", + "TDVx_JS0_ACTIVE", + "TDVx_JS0_WAIT_FLUSH", + "TDVx_JS0_WAIT_READ", + "TDVx_JS0_WAIT_ISSUE", + "TDVx_JS0_WAIT_DEPEND", + "TDVx_JS0_WAIT_FINISH", + "TDVx_JS1_JOBS", + "TDVx_JS1_TASKS", + "TDVx_JS1_ACTIVE", + "TDVx_JS1_WAIT_FLUSH", + "TDVx_JS1_WAIT_READ", + "TDVx_JS1_WAIT_ISSUE", + "TDVx_JS1_WAIT_DEPEND", + "TDVx_JS1_WAIT_FINISH", + "TDVx_JS2_JOBS", + "TDVx_JS2_TASKS", + "TDVx_JS2_ACTIVE", + "TDVx_JS2_WAIT_FLUSH", + "TDVx_JS2_WAIT_READ", + "TDVx_JS2_WAIT_ISSUE", + "TDVx_JS2_WAIT_DEPEND", + "TDVx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TDVx_CACHE_FLUSH", + + /* Tiler */ + "", + "", + "", + "", + "TDVx_TILER_ACTIVE", + "TDVx_JOBS_PROCESSED", + "TDVx_TRIANGLES", + "TDVx_LINES", + "TDVx_POINTS", + "TDVx_FRONT_FACING", + "TDVx_BACK_FACING", + "TDVx_PRIM_VISIBLE", + "TDVx_PRIM_CULLED", + "TDVx_PRIM_CLIPPED", + "TDVx_PRIM_SAT_CULLED", + "TDVx_BIN_ALLOC_INIT", + "TDVx_BIN_ALLOC_OVERFLOW", + "TDVx_BUS_READ", + "", + "TDVx_BUS_WRITE", + "TDVx_LOADING_DESC", + "TDVx_IDVS_POS_SHAD_REQ", + "TDVx_IDVS_POS_SHAD_WAIT", + "TDVx_IDVS_POS_SHAD_STALL", + "TDVx_IDVS_POS_FIFO_FULL", + "TDVx_PREFETCH_STALL", + "TDVx_VCACHE_HIT", + "TDVx_VCACHE_MISS", + "TDVx_VCACHE_LINE_WAIT", + "TDVx_VFETCH_POS_READ_WAIT", + "TDVx_VFETCH_VERTEX_WAIT", + "TDVx_VFETCH_STALL", + "TDVx_PRIMASSY_STALL", + "TDVx_BBOX_GEN_STALL", + "TDVx_IDVS_VBU_HIT", + "TDVx_IDVS_VBU_MISS", + "TDVx_IDVS_VBU_LINE_DEALLOCATE", + "TDVx_IDVS_VAR_SHAD_REQ", + "TDVx_IDVS_VAR_SHAD_STALL", + "TDVx_BINNER_STALL", + "TDVx_ITER_STALL", + "TDVx_COMPRESS_MISS", + "TDVx_COMPRESS_STALL", + "TDVx_PCACHE_HIT", + "TDVx_PCACHE_MISS", + "TDVx_PCACHE_MISS_STALL", + "TDVx_PCACHE_EVICT_STALL", + "TDVx_PMGR_PTR_WR_STALL", + "TDVx_PMGR_PTR_RD_STALL", + "TDVx_PMGR_CMD_WR_STALL", + "TDVx_WRBUF_ACTIVE", + "TDVx_WRBUF_HIT", + "TDVx_WRBUF_MISS", + "TDVx_WRBUF_NO_FREE_LINE_STALL", + "TDVx_WRBUF_NO_AXI_ID_STALL", + "TDVx_WRBUF_AXI_STALL", + "", + "", + "", + "TDVx_UTLB_TRANS", + "TDVx_UTLB_TRANS_HIT", + "TDVx_UTLB_TRANS_STALL", + "TDVx_UTLB_TRANS_MISS_DELAY", + "TDVx_UTLB_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TDVx_FRAG_ACTIVE", + "TDVx_FRAG_PRIMITIVES", + "TDVx_FRAG_PRIM_RAST", + "TDVx_FRAG_FPK_ACTIVE", + "TDVx_FRAG_STARVING", + "TDVx_FRAG_WARPS", + "TDVx_FRAG_PARTIAL_WARPS", + "TDVx_FRAG_QUADS_RAST", + "TDVx_FRAG_QUADS_EZS_TEST", + "TDVx_FRAG_QUADS_EZS_UPDATE", + "TDVx_FRAG_QUADS_EZS_KILL", + "TDVx_FRAG_LZS_TEST", + "TDVx_FRAG_LZS_KILL", + "", + "TDVx_FRAG_PTILES", + "TDVx_FRAG_TRANS_ELIM", + "TDVx_QUAD_FPK_KILLER", + "", + "TDVx_COMPUTE_ACTIVE", + "TDVx_COMPUTE_TASKS", + "TDVx_COMPUTE_WARPS", + "TDVx_COMPUTE_STARVING", + "TDVx_EXEC_CORE_ACTIVE", + "TDVx_EXEC_ACTIVE", + "TDVx_EXEC_INSTR_COUNT", + "TDVx_EXEC_INSTR_DIVERGED", + "TDVx_EXEC_INSTR_STARVING", + "TDVx_ARITH_INSTR_SINGLE_FMA", + "TDVx_ARITH_INSTR_DOUBLE", + "TDVx_ARITH_INSTR_MSG", + "TDVx_ARITH_INSTR_MSG_ONLY", + "TDVx_TEX_MSGI_NUM_QUADS", + "TDVx_TEX_DFCH_NUM_PASSES", + "TDVx_TEX_DFCH_NUM_PASSES_MISS", + "TDVx_TEX_DFCH_NUM_PASSES_MIP_MAP", + "TDVx_TEX_TIDX_NUM_SPLIT_MIP_MAP", + "TDVx_TEX_TFCH_NUM_LINES_FETCHED", + "TDVx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED", + "TDVx_TEX_TFCH_NUM_OPERATIONS", + "TDVx_TEX_FILT_NUM_OPERATIONS", + "TDVx_LS_MEM_READ_FULL", + "TDVx_LS_MEM_READ_SHORT", + "TDVx_LS_MEM_WRITE_FULL", + "TDVx_LS_MEM_WRITE_SHORT", + "TDVx_LS_MEM_ATOMIC", + "TDVx_VARY_INSTR", + "TDVx_VARY_SLOT_32", + "TDVx_VARY_SLOT_16", + "TDVx_ATTR_INSTR", + "TDVx_ARITH_INSTR_FP_MUL", + "TDVx_BEATS_RD_FTC", + "TDVx_BEATS_RD_FTC_EXT", + "TDVx_BEATS_RD_LSC", + "TDVx_BEATS_RD_LSC_EXT", + "TDVx_BEATS_RD_TEX", + "TDVx_BEATS_RD_TEX_EXT", + "TDVx_BEATS_RD_OTHER", + "TDVx_BEATS_WR_LSC_WB", + "TDVx_BEATS_WR_TIB", + "TDVx_BEATS_WR_LSC_OTHER", + + /* Memory System */ + "", + "", + "", + "", + "TDVx_MMU_REQUESTS", + "TDVx_MMU_TABLE_READS_L3", + "TDVx_MMU_TABLE_READS_L2", + "TDVx_MMU_HIT_L3", + "TDVx_MMU_HIT_L2", + "TDVx_MMU_S2_REQUESTS", + "TDVx_MMU_S2_TABLE_READS_L3", + "TDVx_MMU_S2_TABLE_READS_L2", + "TDVx_MMU_S2_HIT_L3", + "TDVx_MMU_S2_HIT_L2", + "", + "", + "TDVx_L2_RD_MSG_IN", + "TDVx_L2_RD_MSG_IN_STALL", + "TDVx_L2_WR_MSG_IN", + "TDVx_L2_WR_MSG_IN_STALL", + "TDVx_L2_SNP_MSG_IN", + "TDVx_L2_SNP_MSG_IN_STALL", + "TDVx_L2_RD_MSG_OUT", + "TDVx_L2_RD_MSG_OUT_STALL", + "TDVx_L2_WR_MSG_OUT", + "TDVx_L2_ANY_LOOKUP", + "TDVx_L2_READ_LOOKUP", + "TDVx_L2_WRITE_LOOKUP", + "TDVx_L2_EXT_SNOOP_LOOKUP", + "TDVx_L2_EXT_READ", + "TDVx_L2_EXT_READ_NOSNP", + "TDVx_L2_EXT_READ_UNIQUE", + "TDVx_L2_EXT_READ_BEATS", + "TDVx_L2_EXT_AR_STALL", + "TDVx_L2_EXT_AR_CNT_Q1", + "TDVx_L2_EXT_AR_CNT_Q2", + "TDVx_L2_EXT_AR_CNT_Q3", + "TDVx_L2_EXT_RRESP_0_127", + "TDVx_L2_EXT_RRESP_128_191", + "TDVx_L2_EXT_RRESP_192_255", + "TDVx_L2_EXT_RRESP_256_319", + "TDVx_L2_EXT_RRESP_320_383", + "TDVx_L2_EXT_WRITE", + "TDVx_L2_EXT_WRITE_NOSNP_FULL", + "TDVx_L2_EXT_WRITE_NOSNP_PTL", + "TDVx_L2_EXT_WRITE_SNP_FULL", + "TDVx_L2_EXT_WRITE_SNP_PTL", + "TDVx_L2_EXT_WRITE_BEATS", + "TDVx_L2_EXT_W_STALL", + "TDVx_L2_EXT_AW_CNT_Q1", + "TDVx_L2_EXT_AW_CNT_Q2", + "TDVx_L2_EXT_AW_CNT_Q3", + "TDVx_L2_EXT_SNOOP", + "TDVx_L2_EXT_SNOOP_STALL", + "TDVx_L2_EXT_SNOOP_RESP_CLEAN", + "TDVx_L2_EXT_SNOOP_RESP_DATA", + "TDVx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G51 */ + static const char * const hardware_counters_mali_tSIx[] = { + /* Job Manager */ + "", + "", + "", + "", + "TSIx_MESSAGES_SENT", + "TSIx_MESSAGES_RECEIVED", + "TSIx_GPU_ACTIVE", + "TSIx_IRQ_ACTIVE", + "TSIx_JS0_JOBS", + "TSIx_JS0_TASKS", + "TSIx_JS0_ACTIVE", + "TSIx_JS0_WAIT_FLUSH", + "TSIx_JS0_WAIT_READ", + "TSIx_JS0_WAIT_ISSUE", + "TSIx_JS0_WAIT_DEPEND", + "TSIx_JS0_WAIT_FINISH", + "TSIx_JS1_JOBS", + "TSIx_JS1_TASKS", + "TSIx_JS1_ACTIVE", + "TSIx_JS1_WAIT_FLUSH", + "TSIx_JS1_WAIT_READ", + "TSIx_JS1_WAIT_ISSUE", + "TSIx_JS1_WAIT_DEPEND", + "TSIx_JS1_WAIT_FINISH", + "TSIx_JS2_JOBS", + "TSIx_JS2_TASKS", + "TSIx_JS2_ACTIVE", + "TSIx_JS2_WAIT_FLUSH", + "TSIx_JS2_WAIT_READ", + "TSIx_JS2_WAIT_ISSUE", + "TSIx_JS2_WAIT_DEPEND", + "TSIx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TSIx_CACHE_FLUSH", + + /* Tiler */ + "", + "", + "", + "", + "TSIx_TILER_ACTIVE", + "TSIx_JOBS_PROCESSED", + "TSIx_TRIANGLES", + "TSIx_LINES", + "TSIx_POINTS", + "TSIx_FRONT_FACING", + "TSIx_BACK_FACING", + "TSIx_PRIM_VISIBLE", + "TSIx_PRIM_CULLED", + "TSIx_PRIM_CLIPPED", + "TSIx_PRIM_SAT_CULLED", + "TSIx_BIN_ALLOC_INIT", + "TSIx_BIN_ALLOC_OVERFLOW", + "TSIx_BUS_READ", + "", + "TSIx_BUS_WRITE", + "TSIx_LOADING_DESC", + "TSIx_IDVS_POS_SHAD_REQ", + "TSIx_IDVS_POS_SHAD_WAIT", + "TSIx_IDVS_POS_SHAD_STALL", + "TSIx_IDVS_POS_FIFO_FULL", + "TSIx_PREFETCH_STALL", + "TSIx_VCACHE_HIT", + "TSIx_VCACHE_MISS", + "TSIx_VCACHE_LINE_WAIT", + "TSIx_VFETCH_POS_READ_WAIT", + "TSIx_VFETCH_VERTEX_WAIT", + "TSIx_VFETCH_STALL", + "TSIx_PRIMASSY_STALL", + "TSIx_BBOX_GEN_STALL", + "TSIx_IDVS_VBU_HIT", + "TSIx_IDVS_VBU_MISS", + "TSIx_IDVS_VBU_LINE_DEALLOCATE", + "TSIx_IDVS_VAR_SHAD_REQ", + "TSIx_IDVS_VAR_SHAD_STALL", + "TSIx_BINNER_STALL", + "TSIx_ITER_STALL", + "TSIx_COMPRESS_MISS", + "TSIx_COMPRESS_STALL", + "TSIx_PCACHE_HIT", + "TSIx_PCACHE_MISS", + "TSIx_PCACHE_MISS_STALL", + "TSIx_PCACHE_EVICT_STALL", + "TSIx_PMGR_PTR_WR_STALL", + "TSIx_PMGR_PTR_RD_STALL", + "TSIx_PMGR_CMD_WR_STALL", + "TSIx_WRBUF_ACTIVE", + "TSIx_WRBUF_HIT", + "TSIx_WRBUF_MISS", + "TSIx_WRBUF_NO_FREE_LINE_STALL", + "TSIx_WRBUF_NO_AXI_ID_STALL", + "TSIx_WRBUF_AXI_STALL", + "", + "", + "", + "TSIx_UTLB_TRANS", + "TSIx_UTLB_TRANS_HIT", + "TSIx_UTLB_TRANS_STALL", + "TSIx_UTLB_TRANS_MISS_DELAY", + "TSIx_UTLB_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TSIx_FRAG_ACTIVE", + "TSIx_FRAG_PRIMITIVES", + "TSIx_FRAG_PRIM_RAST", + "TSIx_FRAG_FPK_ACTIVE", + "TSIx_FRAG_STARVING", + "TSIx_FRAG_WARPS", + "TSIx_FRAG_PARTIAL_WARPS", + "TSIx_FRAG_QUADS_RAST", + "TSIx_FRAG_QUADS_EZS_TEST", + "TSIx_FRAG_QUADS_EZS_UPDATE", + "TSIx_FRAG_QUADS_EZS_KILL", + "TSIx_FRAG_LZS_TEST", + "TSIx_FRAG_LZS_KILL", + "", + "TSIx_FRAG_PTILES", + "TSIx_FRAG_TRANS_ELIM", + "TSIx_QUAD_FPK_KILLER", + "", + "TSIx_COMPUTE_ACTIVE", + "TSIx_COMPUTE_TASKS", + "TSIx_COMPUTE_WARPS", + "TSIx_COMPUTE_STARVING", + "TSIx_EXEC_CORE_ACTIVE", + "TSIx_EXEC_ACTIVE", + "TSIx_EXEC_INSTR_COUNT", + "TSIx_EXEC_INSTR_DIVERGED", + "TSIx_EXEC_INSTR_STARVING", + "TSIx_ARITH_INSTR_SINGLE_FMA", + "TSIx_ARITH_INSTR_DOUBLE", + "TSIx_ARITH_INSTR_MSG", + "TSIx_ARITH_INSTR_MSG_ONLY", + "TSIx_TEX_MSGI_NUM_QUADS", + "TSIx_TEX_DFCH_NUM_PASSES", + "TSIx_TEX_DFCH_NUM_PASSES_MISS", + "TSIx_TEX_DFCH_NUM_PASSES_MIP_MAP", + "TSIx_TEX_TIDX_NUM_SPLIT_MIP_MAP", + "TSIx_TEX_TFCH_NUM_LINES_FETCHED", + "TSIx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED", + "TSIx_TEX_TFCH_NUM_OPERATIONS", + "TSIx_TEX_FILT_NUM_OPERATIONS", + "TSIx_LS_MEM_READ_FULL", + "TSIx_LS_MEM_READ_SHORT", + "TSIx_LS_MEM_WRITE_FULL", + "TSIx_LS_MEM_WRITE_SHORT", + "TSIx_LS_MEM_ATOMIC", + "TSIx_VARY_INSTR", + "TSIx_VARY_SLOT_32", + "TSIx_VARY_SLOT_16", + "TSIx_ATTR_INSTR", + "TSIx_ARITH_INSTR_FP_MUL", + "TSIx_BEATS_RD_FTC", + "TSIx_BEATS_RD_FTC_EXT", + "TSIx_BEATS_RD_LSC", + "TSIx_BEATS_RD_LSC_EXT", + "TSIx_BEATS_RD_TEX", + "TSIx_BEATS_RD_TEX_EXT", + "TSIx_BEATS_RD_OTHER", + "TSIx_BEATS_WR_LSC_OTHER", + "TSIx_BEATS_WR_TIB", + "TSIx_BEATS_WR_LSC_WB", + + /* Memory System */ + "", + "", + "", + "", + "TSIx_MMU_REQUESTS", + "TSIx_MMU_TABLE_READS_L3", + "TSIx_MMU_TABLE_READS_L2", + "TSIx_MMU_HIT_L3", + "TSIx_MMU_HIT_L2", + "TSIx_MMU_S2_REQUESTS", + "TSIx_MMU_S2_TABLE_READS_L3", + "TSIx_MMU_S2_TABLE_READS_L2", + "TSIx_MMU_S2_HIT_L3", + "TSIx_MMU_S2_HIT_L2", + "", + "", + "TSIx_L2_RD_MSG_IN", + "TSIx_L2_RD_MSG_IN_STALL", + "TSIx_L2_WR_MSG_IN", + "TSIx_L2_WR_MSG_IN_STALL", + "TSIx_L2_SNP_MSG_IN", + "TSIx_L2_SNP_MSG_IN_STALL", + "TSIx_L2_RD_MSG_OUT", + "TSIx_L2_RD_MSG_OUT_STALL", + "TSIx_L2_WR_MSG_OUT", + "TSIx_L2_ANY_LOOKUP", + "TSIx_L2_READ_LOOKUP", + "TSIx_L2_WRITE_LOOKUP", + "TSIx_L2_EXT_SNOOP_LOOKUP", + "TSIx_L2_EXT_READ", + "TSIx_L2_EXT_READ_NOSNP", + "TSIx_L2_EXT_READ_UNIQUE", + "TSIx_L2_EXT_READ_BEATS", + "TSIx_L2_EXT_AR_STALL", + "TSIx_L2_EXT_AR_CNT_Q1", + "TSIx_L2_EXT_AR_CNT_Q2", + "TSIx_L2_EXT_AR_CNT_Q3", + "TSIx_L2_EXT_RRESP_0_127", + "TSIx_L2_EXT_RRESP_128_191", + "TSIx_L2_EXT_RRESP_192_255", + "TSIx_L2_EXT_RRESP_256_319", + "TSIx_L2_EXT_RRESP_320_383", + "TSIx_L2_EXT_WRITE", + "TSIx_L2_EXT_WRITE_NOSNP_FULL", + "TSIx_L2_EXT_WRITE_NOSNP_PTL", + "TSIx_L2_EXT_WRITE_SNP_FULL", + "TSIx_L2_EXT_WRITE_SNP_PTL", + "TSIx_L2_EXT_WRITE_BEATS", + "TSIx_L2_EXT_W_STALL", + "TSIx_L2_EXT_AW_CNT_Q1", + "TSIx_L2_EXT_AW_CNT_Q2", + "TSIx_L2_EXT_AW_CNT_Q3", + "TSIx_L2_EXT_SNOOP", + "TSIx_L2_EXT_SNOOP_STALL", + "TSIx_L2_EXT_SNOOP_RESP_CLEAN", + "TSIx_L2_EXT_SNOOP_RESP_DATA", + "TSIx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G52 */ + static const char * const hardware_counters_mali_tGOx[] = { + /* Job Manager */ + "", + "", + "", + "", + "TGOx_MESSAGES_SENT", + "TGOx_MESSAGES_RECEIVED", + "TGOx_GPU_ACTIVE", + "TGOx_IRQ_ACTIVE", + "TGOx_JS0_JOBS", + "TGOx_JS0_TASKS", + "TGOx_JS0_ACTIVE", + "TGOx_JS0_WAIT_FLUSH", + "TGOx_JS0_WAIT_READ", + "TGOx_JS0_WAIT_ISSUE", + "TGOx_JS0_WAIT_DEPEND", + "TGOx_JS0_WAIT_FINISH", + "TGOx_JS1_JOBS", + "TGOx_JS1_TASKS", + "TGOx_JS1_ACTIVE", + "TGOx_JS1_WAIT_FLUSH", + "TGOx_JS1_WAIT_READ", + "TGOx_JS1_WAIT_ISSUE", + "TGOx_JS1_WAIT_DEPEND", + "TGOx_JS1_WAIT_FINISH", + "TGOx_JS2_JOBS", + "TGOx_JS2_TASKS", + "TGOx_JS2_ACTIVE", + "TGOx_JS2_WAIT_FLUSH", + "TGOx_JS2_WAIT_READ", + "TGOx_JS2_WAIT_ISSUE", + "TGOx_JS2_WAIT_DEPEND", + "TGOx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TGOx_CACHE_FLUSH", + + /* Tiler */ + "", + "", + "", + "", + "TGOx_TILER_ACTIVE", + "TGOx_JOBS_PROCESSED", + "TGOx_TRIANGLES", + "TGOx_LINES", + "TGOx_POINTS", + "TGOx_FRONT_FACING", + "TGOx_BACK_FACING", + "TGOx_PRIM_VISIBLE", + "TGOx_PRIM_CULLED", + "TGOx_PRIM_CLIPPED", + "TGOx_PRIM_SAT_CULLED", + "TGOx_BIN_ALLOC_INIT", + "TGOx_BIN_ALLOC_OVERFLOW", + "TGOx_BUS_READ", + "", + "TGOx_BUS_WRITE", + "TGOx_LOADING_DESC", + "TGOx_IDVS_POS_SHAD_REQ", + "TGOx_IDVS_POS_SHAD_WAIT", + "TGOx_IDVS_POS_SHAD_STALL", + "TGOx_IDVS_POS_FIFO_FULL", + "TGOx_PREFETCH_STALL", + "TGOx_VCACHE_HIT", + "TGOx_VCACHE_MISS", + "TGOx_VCACHE_LINE_WAIT", + "TGOx_VFETCH_POS_READ_WAIT", + "TGOx_VFETCH_VERTEX_WAIT", + "TGOx_VFETCH_STALL", + "TGOx_PRIMASSY_STALL", + "TGOx_BBOX_GEN_STALL", + "TGOx_IDVS_VBU_HIT", + "TGOx_IDVS_VBU_MISS", + "TGOx_IDVS_VBU_LINE_DEALLOCATE", + "TGOx_IDVS_VAR_SHAD_REQ", + "TGOx_IDVS_VAR_SHAD_STALL", + "TGOx_BINNER_STALL", + "TGOx_ITER_STALL", + "TGOx_COMPRESS_MISS", + "TGOx_COMPRESS_STALL", + "TGOx_PCACHE_HIT", + "TGOx_PCACHE_MISS", + "TGOx_PCACHE_MISS_STALL", + "TGOx_PCACHE_EVICT_STALL", + "TGOx_PMGR_PTR_WR_STALL", + "TGOx_PMGR_PTR_RD_STALL", + "TGOx_PMGR_CMD_WR_STALL", + "TGOx_WRBUF_ACTIVE", + "TGOx_WRBUF_HIT", + "TGOx_WRBUF_MISS", + "TGOx_WRBUF_NO_FREE_LINE_STALL", + "TGOx_WRBUF_NO_AXI_ID_STALL", + "TGOx_WRBUF_AXI_STALL", + "", + "", + "", + "TGOx_UTLB_TRANS", + "TGOx_UTLB_TRANS_HIT", + "TGOx_UTLB_TRANS_STALL", + "TGOx_UTLB_TRANS_MISS_DELAY", + "TGOx_UTLB_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TGOx_FRAG_ACTIVE", + "TGOx_FRAG_PRIMITIVES", + "TGOx_FRAG_PRIM_RAST", + "TGOx_FRAG_FPK_ACTIVE", + "TGOx_FRAG_STARVING", + "TGOx_FRAG_WARPS", + "TGOx_FRAG_PARTIAL_WARPS", + "TGOx_FRAG_QUADS_RAST", + "TGOx_FRAG_QUADS_EZS_TEST", + "TGOx_FRAG_QUADS_EZS_UPDATE", + "TGOx_FRAG_QUADS_EZS_KILL", + "TGOx_FRAG_LZS_TEST", + "TGOx_FRAG_LZS_KILL", + "TGOx_WARP_REG_SIZE_64", + "TGOx_FRAG_PTILES", + "TGOx_FRAG_TRANS_ELIM", + "TGOx_QUAD_FPK_KILLER", + "TGOx_FULL_QUAD_WARPS", + "TGOx_COMPUTE_ACTIVE", + "TGOx_COMPUTE_TASKS", + "TGOx_COMPUTE_WARPS", + "TGOx_COMPUTE_STARVING", + "TGOx_EXEC_CORE_ACTIVE", + "TGOx_EXEC_ACTIVE", + "TGOx_EXEC_INSTR_COUNT", + "TGOx_EXEC_INSTR_DIVERGED", + "TGOx_EXEC_INSTR_STARVING", + "TGOx_ARITH_INSTR_SINGLE_FMA", + "TGOx_ARITH_INSTR_DOUBLE", + "TGOx_ARITH_INSTR_MSG", + "TGOx_ARITH_INSTR_MSG_ONLY", + "TGOx_TEX_MSGI_NUM_QUADS", + "TGOx_TEX_DFCH_NUM_PASSES", + "TGOx_TEX_DFCH_NUM_PASSES_MISS", + "TGOx_TEX_DFCH_NUM_PASSES_MIP_MAP", + "TGOx_TEX_TIDX_NUM_SPLIT_MIP_MAP", + "TGOx_TEX_TFCH_NUM_LINES_FETCHED", + "TGOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED", + "TGOx_TEX_TFCH_NUM_OPERATIONS", + "TGOx_TEX_FILT_NUM_OPERATIONS", + "TGOx_LS_MEM_READ_FULL", + "TGOx_LS_MEM_READ_SHORT", + "TGOx_LS_MEM_WRITE_FULL", + "TGOx_LS_MEM_WRITE_SHORT", + "TGOx_LS_MEM_ATOMIC", + "TGOx_VARY_INSTR", + "TGOx_VARY_SLOT_32", + "TGOx_VARY_SLOT_16", + "TGOx_ATTR_INSTR", + "TGOx_ARITH_INSTR_FP_MUL", + "TGOx_BEATS_RD_FTC", + "TGOx_BEATS_RD_FTC_EXT", + "TGOx_BEATS_RD_LSC", + "TGOx_BEATS_RD_LSC_EXT", + "TGOx_BEATS_RD_TEX", + "TGOx_BEATS_RD_TEX_EXT", + "TGOx_BEATS_RD_OTHER", + "TGOx_BEATS_WR_LSC_WB", + "TGOx_BEATS_WR_TIB", + "TGOx_BEATS_WR_LSC_OTHER", + + /* Memory System */ + "", + "", + "", + "", + "TGOx_MMU_REQUESTS", + "TGOx_MMU_TABLE_READS_L3", + "TGOx_MMU_TABLE_READS_L2", + "TGOx_MMU_HIT_L3", + "TGOx_MMU_HIT_L2", + "TGOx_MMU_S2_REQUESTS", + "TGOx_MMU_S2_TABLE_READS_L3", + "TGOx_MMU_S2_TABLE_READS_L2", + "TGOx_MMU_S2_HIT_L3", + "TGOx_MMU_S2_HIT_L2", + "", + "", + "TGOx_L2_RD_MSG_IN", + "TGOx_L2_RD_MSG_IN_STALL", + "TGOx_L2_WR_MSG_IN", + "TGOx_L2_WR_MSG_IN_STALL", + "TGOx_L2_SNP_MSG_IN", + "TGOx_L2_SNP_MSG_IN_STALL", + "TGOx_L2_RD_MSG_OUT", + "TGOx_L2_RD_MSG_OUT_STALL", + "TGOx_L2_WR_MSG_OUT", + "TGOx_L2_ANY_LOOKUP", + "TGOx_L2_READ_LOOKUP", + "TGOx_L2_WRITE_LOOKUP", + "TGOx_L2_EXT_SNOOP_LOOKUP", + "TGOx_L2_EXT_READ", + "TGOx_L2_EXT_READ_NOSNP", + "TGOx_L2_EXT_READ_UNIQUE", + "TGOx_L2_EXT_READ_BEATS", + "TGOx_L2_EXT_AR_STALL", + "TGOx_L2_EXT_AR_CNT_Q1", + "TGOx_L2_EXT_AR_CNT_Q2", + "TGOx_L2_EXT_AR_CNT_Q3", + "TGOx_L2_EXT_RRESP_0_127", + "TGOx_L2_EXT_RRESP_128_191", + "TGOx_L2_EXT_RRESP_192_255", + "TGOx_L2_EXT_RRESP_256_319", + "TGOx_L2_EXT_RRESP_320_383", + "TGOx_L2_EXT_WRITE", + "TGOx_L2_EXT_WRITE_NOSNP_FULL", + "TGOx_L2_EXT_WRITE_NOSNP_PTL", + "TGOx_L2_EXT_WRITE_SNP_FULL", + "TGOx_L2_EXT_WRITE_SNP_PTL", + "TGOx_L2_EXT_WRITE_BEATS", + "TGOx_L2_EXT_W_STALL", + "TGOx_L2_EXT_AW_CNT_Q1", + "TGOx_L2_EXT_AW_CNT_Q2", + "TGOx_L2_EXT_AW_CNT_Q3", + "TGOx_L2_EXT_SNOOP", + "TGOx_L2_EXT_SNOOP_STALL", + "TGOx_L2_EXT_SNOOP_RESP_CLEAN", + "TGOx_L2_EXT_SNOOP_RESP_DATA", + "TGOx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G71 */ + static const char * const hardware_counters_mali_tMIx[] = { + /* Job Manager */ + "", + "", + "", + "", + "TMIx_MESSAGES_SENT", + "TMIx_MESSAGES_RECEIVED", + "TMIx_GPU_ACTIVE", + "TMIx_IRQ_ACTIVE", + "TMIx_JS0_JOBS", + "TMIx_JS0_TASKS", + "TMIx_JS0_ACTIVE", + "TMIx_JS0_WAIT_FLUSH", + "TMIx_JS0_WAIT_READ", + "TMIx_JS0_WAIT_ISSUE", + "TMIx_JS0_WAIT_DEPEND", + "TMIx_JS0_WAIT_FINISH", + "TMIx_JS1_JOBS", + "TMIx_JS1_TASKS", + "TMIx_JS1_ACTIVE", + "TMIx_JS1_WAIT_FLUSH", + "TMIx_JS1_WAIT_READ", + "TMIx_JS1_WAIT_ISSUE", + "TMIx_JS1_WAIT_DEPEND", + "TMIx_JS1_WAIT_FINISH", + "TMIx_JS2_JOBS", + "TMIx_JS2_TASKS", + "TMIx_JS2_ACTIVE", + "TMIx_JS2_WAIT_FLUSH", + "TMIx_JS2_WAIT_READ", + "TMIx_JS2_WAIT_ISSUE", + "TMIx_JS2_WAIT_DEPEND", + "TMIx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TMIx_CACHE_FLUSH", + + /* Tiler */ + "", + "", + "", + "", + "TMIx_TILER_ACTIVE", + "TMIx_JOBS_PROCESSED", + "TMIx_TRIANGLES", + "TMIx_LINES", + "TMIx_POINTS", + "TMIx_FRONT_FACING", + "TMIx_BACK_FACING", + "TMIx_PRIM_VISIBLE", + "TMIx_PRIM_CULLED", + "TMIx_PRIM_CLIPPED", + "TMIx_PRIM_SAT_CULLED", + "TMIx_BIN_ALLOC_INIT", + "TMIx_BIN_ALLOC_OVERFLOW", + "TMIx_BUS_READ", + "", + "TMIx_BUS_WRITE", + "TMIx_LOADING_DESC", + "TMIx_IDVS_POS_SHAD_REQ", + "TMIx_IDVS_POS_SHAD_WAIT", + "TMIx_IDVS_POS_SHAD_STALL", + "TMIx_IDVS_POS_FIFO_FULL", + "TMIx_PREFETCH_STALL", + "TMIx_VCACHE_HIT", + "TMIx_VCACHE_MISS", + "TMIx_VCACHE_LINE_WAIT", + "TMIx_VFETCH_POS_READ_WAIT", + "TMIx_VFETCH_VERTEX_WAIT", + "TMIx_VFETCH_STALL", + "TMIx_PRIMASSY_STALL", + "TMIx_BBOX_GEN_STALL", + "TMIx_IDVS_VBU_HIT", + "TMIx_IDVS_VBU_MISS", + "TMIx_IDVS_VBU_LINE_DEALLOCATE", + "TMIx_IDVS_VAR_SHAD_REQ", + "TMIx_IDVS_VAR_SHAD_STALL", + "TMIx_BINNER_STALL", + "TMIx_ITER_STALL", + "TMIx_COMPRESS_MISS", + "TMIx_COMPRESS_STALL", + "TMIx_PCACHE_HIT", + "TMIx_PCACHE_MISS", + "TMIx_PCACHE_MISS_STALL", + "TMIx_PCACHE_EVICT_STALL", + "TMIx_PMGR_PTR_WR_STALL", + "TMIx_PMGR_PTR_RD_STALL", + "TMIx_PMGR_CMD_WR_STALL", + "TMIx_WRBUF_ACTIVE", + "TMIx_WRBUF_HIT", + "TMIx_WRBUF_MISS", + "TMIx_WRBUF_NO_FREE_LINE_STALL", + "TMIx_WRBUF_NO_AXI_ID_STALL", + "TMIx_WRBUF_AXI_STALL", + "", + "", + "", + "TMIx_UTLB_TRANS", + "TMIx_UTLB_TRANS_HIT", + "TMIx_UTLB_TRANS_STALL", + "TMIx_UTLB_TRANS_MISS_DELAY", + "TMIx_UTLB_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TMIx_FRAG_ACTIVE", + "TMIx_FRAG_PRIMITIVES", + "TMIx_FRAG_PRIM_RAST", + "TMIx_FRAG_FPK_ACTIVE", + "TMIx_FRAG_STARVING", + "TMIx_FRAG_WARPS", + "TMIx_FRAG_PARTIAL_WARPS", + "TMIx_FRAG_QUADS_RAST", + "TMIx_FRAG_QUADS_EZS_TEST", + "TMIx_FRAG_QUADS_EZS_UPDATE", + "TMIx_FRAG_QUADS_EZS_KILL", + "TMIx_FRAG_LZS_TEST", + "TMIx_FRAG_LZS_KILL", + "", + "TMIx_FRAG_PTILES", + "TMIx_FRAG_TRANS_ELIM", + "TMIx_QUAD_FPK_KILLER", + "", + "TMIx_COMPUTE_ACTIVE", + "TMIx_COMPUTE_TASKS", + "TMIx_COMPUTE_WARPS", + "TMIx_COMPUTE_STARVING", + "TMIx_EXEC_CORE_ACTIVE", + "TMIx_EXEC_ACTIVE", + "TMIx_EXEC_INSTR_COUNT", + "TMIx_EXEC_INSTR_DIVERGED", + "TMIx_EXEC_INSTR_STARVING", + "TMIx_ARITH_INSTR_SINGLE_FMA", + "TMIx_ARITH_INSTR_DOUBLE", + "TMIx_ARITH_INSTR_MSG", + "TMIx_ARITH_INSTR_MSG_ONLY", + "TMIx_TEX_INSTR", + "TMIx_TEX_INSTR_MIPMAP", + "TMIx_TEX_INSTR_COMPRESSED", + "TMIx_TEX_INSTR_3D", + "TMIx_TEX_INSTR_TRILINEAR", + "TMIx_TEX_COORD_ISSUE", + "TMIx_TEX_COORD_STALL", + "TMIx_TEX_STARVE_CACHE", + "TMIx_TEX_STARVE_FILTER", + "TMIx_LS_MEM_READ_FULL", + "TMIx_LS_MEM_READ_SHORT", + "TMIx_LS_MEM_WRITE_FULL", + "TMIx_LS_MEM_WRITE_SHORT", + "TMIx_LS_MEM_ATOMIC", + "TMIx_VARY_INSTR", + "TMIx_VARY_SLOT_32", + "TMIx_VARY_SLOT_16", + "TMIx_ATTR_INSTR", + "TMIx_ARITH_INSTR_FP_MUL", + "TMIx_BEATS_RD_FTC", + "TMIx_BEATS_RD_FTC_EXT", + "TMIx_BEATS_RD_LSC", + "TMIx_BEATS_RD_LSC_EXT", + "TMIx_BEATS_RD_TEX", + "TMIx_BEATS_RD_TEX_EXT", + "TMIx_BEATS_RD_OTHER", + "TMIx_BEATS_WR_LSC", + "TMIx_BEATS_WR_TIB", + "TMIx_BEATS_WR_OTHER", + + /* Memory System */ + "", + "", + "", + "", + "TMIx_MMU_REQUESTS", + "TMIx_MMU_TABLE_READS_L3", + "TMIx_MMU_TABLE_READS_L2", + "TMIx_MMU_HIT_L3", + "TMIx_MMU_HIT_L2", + "TMIx_MMU_S2_REQUESTS", + "TMIx_MMU_S2_TABLE_READS_L3", + "TMIx_MMU_S2_TABLE_READS_L2", + "TMIx_MMU_S2_HIT_L3", + "TMIx_MMU_S2_HIT_L2", + "", + "", + "TMIx_L2_RD_MSG_IN", + "TMIx_L2_RD_MSG_IN_STALL", + "TMIx_L2_WR_MSG_IN", + "TMIx_L2_WR_MSG_IN_STALL", + "TMIx_L2_SNP_MSG_IN", + "TMIx_L2_SNP_MSG_IN_STALL", + "TMIx_L2_RD_MSG_OUT", + "TMIx_L2_RD_MSG_OUT_STALL", + "TMIx_L2_WR_MSG_OUT", + "TMIx_L2_ANY_LOOKUP", + "TMIx_L2_READ_LOOKUP", + "TMIx_L2_WRITE_LOOKUP", + "TMIx_L2_EXT_SNOOP_LOOKUP", + "TMIx_L2_EXT_READ", + "TMIx_L2_EXT_READ_NOSNP", + "TMIx_L2_EXT_READ_UNIQUE", + "TMIx_L2_EXT_READ_BEATS", + "TMIx_L2_EXT_AR_STALL", + "TMIx_L2_EXT_AR_CNT_Q1", + "TMIx_L2_EXT_AR_CNT_Q2", + "TMIx_L2_EXT_AR_CNT_Q3", + "TMIx_L2_EXT_RRESP_0_127", + "TMIx_L2_EXT_RRESP_128_191", + "TMIx_L2_EXT_RRESP_192_255", + "TMIx_L2_EXT_RRESP_256_319", + "TMIx_L2_EXT_RRESP_320_383", + "TMIx_L2_EXT_WRITE", + "TMIx_L2_EXT_WRITE_NOSNP_FULL", + "TMIx_L2_EXT_WRITE_NOSNP_PTL", + "TMIx_L2_EXT_WRITE_SNP_FULL", + "TMIx_L2_EXT_WRITE_SNP_PTL", + "TMIx_L2_EXT_WRITE_BEATS", + "TMIx_L2_EXT_W_STALL", + "TMIx_L2_EXT_AW_CNT_Q1", + "TMIx_L2_EXT_AW_CNT_Q2", + "TMIx_L2_EXT_AW_CNT_Q3", + "TMIx_L2_EXT_SNOOP", + "TMIx_L2_EXT_SNOOP_STALL", + "TMIx_L2_EXT_SNOOP_RESP_CLEAN", + "TMIx_L2_EXT_SNOOP_RESP_DATA", + "TMIx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G72 */ + static const char * const hardware_counters_mali_tHEx[] = { + /* Job Manager */ + "", + "", + "", + "", + "THEx_MESSAGES_SENT", + "THEx_MESSAGES_RECEIVED", + "THEx_GPU_ACTIVE", + "THEx_IRQ_ACTIVE", + "THEx_JS0_JOBS", + "THEx_JS0_TASKS", + "THEx_JS0_ACTIVE", + "THEx_JS0_WAIT_FLUSH", + "THEx_JS0_WAIT_READ", + "THEx_JS0_WAIT_ISSUE", + "THEx_JS0_WAIT_DEPEND", + "THEx_JS0_WAIT_FINISH", + "THEx_JS1_JOBS", + "THEx_JS1_TASKS", + "THEx_JS1_ACTIVE", + "THEx_JS1_WAIT_FLUSH", + "THEx_JS1_WAIT_READ", + "THEx_JS1_WAIT_ISSUE", + "THEx_JS1_WAIT_DEPEND", + "THEx_JS1_WAIT_FINISH", + "THEx_JS2_JOBS", + "THEx_JS2_TASKS", + "THEx_JS2_ACTIVE", + "THEx_JS2_WAIT_FLUSH", + "THEx_JS2_WAIT_READ", + "THEx_JS2_WAIT_ISSUE", + "THEx_JS2_WAIT_DEPEND", + "THEx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "THEx_CACHE_FLUSH", + + /* Tiler */ + "", + "", + "", + "", + "THEx_TILER_ACTIVE", + "THEx_JOBS_PROCESSED", + "THEx_TRIANGLES", + "THEx_LINES", + "THEx_POINTS", + "THEx_FRONT_FACING", + "THEx_BACK_FACING", + "THEx_PRIM_VISIBLE", + "THEx_PRIM_CULLED", + "THEx_PRIM_CLIPPED", + "THEx_PRIM_SAT_CULLED", + "THEx_BIN_ALLOC_INIT", + "THEx_BIN_ALLOC_OVERFLOW", + "THEx_BUS_READ", + "", + "THEx_BUS_WRITE", + "THEx_LOADING_DESC", + "THEx_IDVS_POS_SHAD_REQ", + "THEx_IDVS_POS_SHAD_WAIT", + "THEx_IDVS_POS_SHAD_STALL", + "THEx_IDVS_POS_FIFO_FULL", + "THEx_PREFETCH_STALL", + "THEx_VCACHE_HIT", + "THEx_VCACHE_MISS", + "THEx_VCACHE_LINE_WAIT", + "THEx_VFETCH_POS_READ_WAIT", + "THEx_VFETCH_VERTEX_WAIT", + "THEx_VFETCH_STALL", + "THEx_PRIMASSY_STALL", + "THEx_BBOX_GEN_STALL", + "THEx_IDVS_VBU_HIT", + "THEx_IDVS_VBU_MISS", + "THEx_IDVS_VBU_LINE_DEALLOCATE", + "THEx_IDVS_VAR_SHAD_REQ", + "THEx_IDVS_VAR_SHAD_STALL", + "THEx_BINNER_STALL", + "THEx_ITER_STALL", + "THEx_COMPRESS_MISS", + "THEx_COMPRESS_STALL", + "THEx_PCACHE_HIT", + "THEx_PCACHE_MISS", + "THEx_PCACHE_MISS_STALL", + "THEx_PCACHE_EVICT_STALL", + "THEx_PMGR_PTR_WR_STALL", + "THEx_PMGR_PTR_RD_STALL", + "THEx_PMGR_CMD_WR_STALL", + "THEx_WRBUF_ACTIVE", + "THEx_WRBUF_HIT", + "THEx_WRBUF_MISS", + "THEx_WRBUF_NO_FREE_LINE_STALL", + "THEx_WRBUF_NO_AXI_ID_STALL", + "THEx_WRBUF_AXI_STALL", + "", + "", + "", + "THEx_UTLB_TRANS", + "THEx_UTLB_TRANS_HIT", + "THEx_UTLB_TRANS_STALL", + "THEx_UTLB_TRANS_MISS_DELAY", + "THEx_UTLB_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "THEx_FRAG_ACTIVE", + "THEx_FRAG_PRIMITIVES", + "THEx_FRAG_PRIM_RAST", + "THEx_FRAG_FPK_ACTIVE", + "THEx_FRAG_STARVING", + "THEx_FRAG_WARPS", + "THEx_FRAG_PARTIAL_WARPS", + "THEx_FRAG_QUADS_RAST", + "THEx_FRAG_QUADS_EZS_TEST", + "THEx_FRAG_QUADS_EZS_UPDATE", + "THEx_FRAG_QUADS_EZS_KILL", + "THEx_FRAG_LZS_TEST", + "THEx_FRAG_LZS_KILL", + "", + "THEx_FRAG_PTILES", + "THEx_FRAG_TRANS_ELIM", + "THEx_QUAD_FPK_KILLER", + "", + "THEx_COMPUTE_ACTIVE", + "THEx_COMPUTE_TASKS", + "THEx_COMPUTE_WARPS", + "THEx_COMPUTE_STARVING", + "THEx_EXEC_CORE_ACTIVE", + "THEx_EXEC_ACTIVE", + "THEx_EXEC_INSTR_COUNT", + "THEx_EXEC_INSTR_DIVERGED", + "THEx_EXEC_INSTR_STARVING", + "THEx_ARITH_INSTR_SINGLE_FMA", + "THEx_ARITH_INSTR_DOUBLE", + "THEx_ARITH_INSTR_MSG", + "THEx_ARITH_INSTR_MSG_ONLY", + "THEx_TEX_INSTR", + "THEx_TEX_INSTR_MIPMAP", + "THEx_TEX_INSTR_COMPRESSED", + "THEx_TEX_INSTR_3D", + "THEx_TEX_INSTR_TRILINEAR", + "THEx_TEX_COORD_ISSUE", + "THEx_TEX_COORD_STALL", + "THEx_TEX_STARVE_CACHE", + "THEx_TEX_STARVE_FILTER", + "THEx_LS_MEM_READ_FULL", + "THEx_LS_MEM_READ_SHORT", + "THEx_LS_MEM_WRITE_FULL", + "THEx_LS_MEM_WRITE_SHORT", + "THEx_LS_MEM_ATOMIC", + "THEx_VARY_INSTR", + "THEx_VARY_SLOT_32", + "THEx_VARY_SLOT_16", + "THEx_ATTR_INSTR", + "THEx_ARITH_INSTR_FP_MUL", + "THEx_BEATS_RD_FTC", + "THEx_BEATS_RD_FTC_EXT", + "THEx_BEATS_RD_LSC", + "THEx_BEATS_RD_LSC_EXT", + "THEx_BEATS_RD_TEX", + "THEx_BEATS_RD_TEX_EXT", + "THEx_BEATS_RD_OTHER", + "THEx_BEATS_WR_LSC", + "THEx_BEATS_WR_TIB", + "THEx_BEATS_WR_OTHER", + + /* Memory System */ + "", + "", + "", + "", + "THEx_MMU_REQUESTS", + "THEx_MMU_TABLE_READS_L3", + "THEx_MMU_TABLE_READS_L2", + "THEx_MMU_HIT_L3", + "THEx_MMU_HIT_L2", + "THEx_MMU_S2_REQUESTS", + "THEx_MMU_S2_TABLE_READS_L3", + "THEx_MMU_S2_TABLE_READS_L2", + "THEx_MMU_S2_HIT_L3", + "THEx_MMU_S2_HIT_L2", + "", + "", + "THEx_L2_RD_MSG_IN", + "THEx_L2_RD_MSG_IN_STALL", + "THEx_L2_WR_MSG_IN", + "THEx_L2_WR_MSG_IN_STALL", + "THEx_L2_SNP_MSG_IN", + "THEx_L2_SNP_MSG_IN_STALL", + "THEx_L2_RD_MSG_OUT", + "THEx_L2_RD_MSG_OUT_STALL", + "THEx_L2_WR_MSG_OUT", + "THEx_L2_ANY_LOOKUP", + "THEx_L2_READ_LOOKUP", + "THEx_L2_WRITE_LOOKUP", + "THEx_L2_EXT_SNOOP_LOOKUP", + "THEx_L2_EXT_READ", + "THEx_L2_EXT_READ_NOSNP", + "THEx_L2_EXT_READ_UNIQUE", + "THEx_L2_EXT_READ_BEATS", + "THEx_L2_EXT_AR_STALL", + "THEx_L2_EXT_AR_CNT_Q1", + "THEx_L2_EXT_AR_CNT_Q2", + "THEx_L2_EXT_AR_CNT_Q3", + "THEx_L2_EXT_RRESP_0_127", + "THEx_L2_EXT_RRESP_128_191", + "THEx_L2_EXT_RRESP_192_255", + "THEx_L2_EXT_RRESP_256_319", + "THEx_L2_EXT_RRESP_320_383", + "THEx_L2_EXT_WRITE", + "THEx_L2_EXT_WRITE_NOSNP_FULL", + "THEx_L2_EXT_WRITE_NOSNP_PTL", + "THEx_L2_EXT_WRITE_SNP_FULL", + "THEx_L2_EXT_WRITE_SNP_PTL", + "THEx_L2_EXT_WRITE_BEATS", + "THEx_L2_EXT_W_STALL", + "THEx_L2_EXT_AW_CNT_Q1", + "THEx_L2_EXT_AW_CNT_Q2", + "THEx_L2_EXT_AW_CNT_Q3", + "THEx_L2_EXT_SNOOP", + "THEx_L2_EXT_SNOOP_STALL", + "THEx_L2_EXT_SNOOP_RESP_CLEAN", + "THEx_L2_EXT_SNOOP_RESP_DATA", + "THEx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G76 */ + static const char * const hardware_counters_mali_tNOx[] = { + /* Job Manager */ + "", + "", + "", + "", + "TNOx_MESSAGES_SENT", + "TNOx_MESSAGES_RECEIVED", + "TNOx_GPU_ACTIVE", + "TNOx_IRQ_ACTIVE", + "TNOx_JS0_JOBS", + "TNOx_JS0_TASKS", + "TNOx_JS0_ACTIVE", + "TNOx_JS0_WAIT_FLUSH", + "TNOx_JS0_WAIT_READ", + "TNOx_JS0_WAIT_ISSUE", + "TNOx_JS0_WAIT_DEPEND", + "TNOx_JS0_WAIT_FINISH", + "TNOx_JS1_JOBS", + "TNOx_JS1_TASKS", + "TNOx_JS1_ACTIVE", + "TNOx_JS1_WAIT_FLUSH", + "TNOx_JS1_WAIT_READ", + "TNOx_JS1_WAIT_ISSUE", + "TNOx_JS1_WAIT_DEPEND", + "TNOx_JS1_WAIT_FINISH", + "TNOx_JS2_JOBS", + "TNOx_JS2_TASKS", + "TNOx_JS2_ACTIVE", + "TNOx_JS2_WAIT_FLUSH", + "TNOx_JS2_WAIT_READ", + "TNOx_JS2_WAIT_ISSUE", + "TNOx_JS2_WAIT_DEPEND", + "TNOx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TNOx_CACHE_FLUSH", + + /* Tiler */ + "", + "", + "", + "", + "TNOx_TILER_ACTIVE", + "TNOx_JOBS_PROCESSED", + "TNOx_TRIANGLES", + "TNOx_LINES", + "TNOx_POINTS", + "TNOx_FRONT_FACING", + "TNOx_BACK_FACING", + "TNOx_PRIM_VISIBLE", + "TNOx_PRIM_CULLED", + "TNOx_PRIM_CLIPPED", + "TNOx_PRIM_SAT_CULLED", + "TNOx_BIN_ALLOC_INIT", + "TNOx_BIN_ALLOC_OVERFLOW", + "TNOx_BUS_READ", + "", + "TNOx_BUS_WRITE", + "TNOx_LOADING_DESC", + "TNOx_IDVS_POS_SHAD_REQ", + "TNOx_IDVS_POS_SHAD_WAIT", + "TNOx_IDVS_POS_SHAD_STALL", + "TNOx_IDVS_POS_FIFO_FULL", + "TNOx_PREFETCH_STALL", + "TNOx_VCACHE_HIT", + "TNOx_VCACHE_MISS", + "TNOx_VCACHE_LINE_WAIT", + "TNOx_VFETCH_POS_READ_WAIT", + "TNOx_VFETCH_VERTEX_WAIT", + "TNOx_VFETCH_STALL", + "TNOx_PRIMASSY_STALL", + "TNOx_BBOX_GEN_STALL", + "TNOx_IDVS_VBU_HIT", + "TNOx_IDVS_VBU_MISS", + "TNOx_IDVS_VBU_LINE_DEALLOCATE", + "TNOx_IDVS_VAR_SHAD_REQ", + "TNOx_IDVS_VAR_SHAD_STALL", + "TNOx_BINNER_STALL", + "TNOx_ITER_STALL", + "TNOx_COMPRESS_MISS", + "TNOx_COMPRESS_STALL", + "TNOx_PCACHE_HIT", + "TNOx_PCACHE_MISS", + "TNOx_PCACHE_MISS_STALL", + "TNOx_PCACHE_EVICT_STALL", + "TNOx_PMGR_PTR_WR_STALL", + "TNOx_PMGR_PTR_RD_STALL", + "TNOx_PMGR_CMD_WR_STALL", + "TNOx_WRBUF_ACTIVE", + "TNOx_WRBUF_HIT", + "TNOx_WRBUF_MISS", + "TNOx_WRBUF_NO_FREE_LINE_STALL", + "TNOx_WRBUF_NO_AXI_ID_STALL", + "TNOx_WRBUF_AXI_STALL", + "", + "", + "", + "TNOx_UTLB_TRANS", + "TNOx_UTLB_TRANS_HIT", + "TNOx_UTLB_TRANS_STALL", + "TNOx_UTLB_TRANS_MISS_DELAY", + "TNOx_UTLB_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TNOx_FRAG_ACTIVE", + "TNOx_FRAG_PRIMITIVES", + "TNOx_FRAG_PRIM_RAST", + "TNOx_FRAG_FPK_ACTIVE", + "TNOx_FRAG_STARVING", + "TNOx_FRAG_WARPS", + "TNOx_FRAG_PARTIAL_WARPS", + "TNOx_FRAG_QUADS_RAST", + "TNOx_FRAG_QUADS_EZS_TEST", + "TNOx_FRAG_QUADS_EZS_UPDATE", + "TNOx_FRAG_QUADS_EZS_KILL", + "TNOx_FRAG_LZS_TEST", + "TNOx_FRAG_LZS_KILL", + "TNOx_WARP_REG_SIZE_64", + "TNOx_FRAG_PTILES", + "TNOx_FRAG_TRANS_ELIM", + "TNOx_QUAD_FPK_KILLER", + "TNOx_FULL_QUAD_WARPS", + "TNOx_COMPUTE_ACTIVE", + "TNOx_COMPUTE_TASKS", + "TNOx_COMPUTE_WARPS", + "TNOx_COMPUTE_STARVING", + "TNOx_EXEC_CORE_ACTIVE", + "TNOx_EXEC_ACTIVE", + "TNOx_EXEC_INSTR_COUNT", + "TNOx_EXEC_INSTR_DIVERGED", + "TNOx_EXEC_INSTR_STARVING", + "TNOx_ARITH_INSTR_SINGLE_FMA", + "TNOx_ARITH_INSTR_DOUBLE", + "TNOx_ARITH_INSTR_MSG", + "TNOx_ARITH_INSTR_MSG_ONLY", + "TNOx_TEX_MSGI_NUM_QUADS", + "TNOx_TEX_DFCH_NUM_PASSES", + "TNOx_TEX_DFCH_NUM_PASSES_MISS", + "TNOx_TEX_DFCH_NUM_PASSES_MIP_MAP", + "TNOx_TEX_TIDX_NUM_SPLIT_MIP_MAP", + "TNOx_TEX_TFCH_NUM_LINES_FETCHED", + "TNOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED", + "TNOx_TEX_TFCH_NUM_OPERATIONS", + "TNOx_TEX_FILT_NUM_OPERATIONS", + "TNOx_LS_MEM_READ_FULL", + "TNOx_LS_MEM_READ_SHORT", + "TNOx_LS_MEM_WRITE_FULL", + "TNOx_LS_MEM_WRITE_SHORT", + "TNOx_LS_MEM_ATOMIC", + "TNOx_VARY_INSTR", + "TNOx_VARY_SLOT_32", + "TNOx_VARY_SLOT_16", + "TNOx_ATTR_INSTR", + "TNOx_ARITH_INSTR_FP_MUL", + "TNOx_BEATS_RD_FTC", + "TNOx_BEATS_RD_FTC_EXT", + "TNOx_BEATS_RD_LSC", + "TNOx_BEATS_RD_LSC_EXT", + "TNOx_BEATS_RD_TEX", + "TNOx_BEATS_RD_TEX_EXT", + "TNOx_BEATS_RD_OTHER", + "TNOx_BEATS_WR_LSC_OTHER", + "TNOx_BEATS_WR_TIB", + "TNOx_BEATS_WR_LSC_WB", + + /* Memory System */ + "", + "", + "", + "", + "TNOx_MMU_REQUESTS", + "TNOx_MMU_TABLE_READS_L3", + "TNOx_MMU_TABLE_READS_L2", + "TNOx_MMU_HIT_L3", + "TNOx_MMU_HIT_L2", + "TNOx_MMU_S2_REQUESTS", + "TNOx_MMU_S2_TABLE_READS_L3", + "TNOx_MMU_S2_TABLE_READS_L2", + "TNOx_MMU_S2_HIT_L3", + "TNOx_MMU_S2_HIT_L2", + "", + "", + "TNOx_L2_RD_MSG_IN", + "TNOx_L2_RD_MSG_IN_STALL", + "TNOx_L2_WR_MSG_IN", + "TNOx_L2_WR_MSG_IN_STALL", + "TNOx_L2_SNP_MSG_IN", + "TNOx_L2_SNP_MSG_IN_STALL", + "TNOx_L2_RD_MSG_OUT", + "TNOx_L2_RD_MSG_OUT_STALL", + "TNOx_L2_WR_MSG_OUT", + "TNOx_L2_ANY_LOOKUP", + "TNOx_L2_READ_LOOKUP", + "TNOx_L2_WRITE_LOOKUP", + "TNOx_L2_EXT_SNOOP_LOOKUP", + "TNOx_L2_EXT_READ", + "TNOx_L2_EXT_READ_NOSNP", + "TNOx_L2_EXT_READ_UNIQUE", + "TNOx_L2_EXT_READ_BEATS", + "TNOx_L2_EXT_AR_STALL", + "TNOx_L2_EXT_AR_CNT_Q1", + "TNOx_L2_EXT_AR_CNT_Q2", + "TNOx_L2_EXT_AR_CNT_Q3", + "TNOx_L2_EXT_RRESP_0_127", + "TNOx_L2_EXT_RRESP_128_191", + "TNOx_L2_EXT_RRESP_192_255", + "TNOx_L2_EXT_RRESP_256_319", + "TNOx_L2_EXT_RRESP_320_383", + "TNOx_L2_EXT_WRITE", + "TNOx_L2_EXT_WRITE_NOSNP_FULL", + "TNOx_L2_EXT_WRITE_NOSNP_PTL", + "TNOx_L2_EXT_WRITE_SNP_FULL", + "TNOx_L2_EXT_WRITE_SNP_PTL", + "TNOx_L2_EXT_WRITE_BEATS", + "TNOx_L2_EXT_W_STALL", + "TNOx_L2_EXT_AW_CNT_Q1", + "TNOx_L2_EXT_AW_CNT_Q2", + "TNOx_L2_EXT_AW_CNT_Q3", + "TNOx_L2_EXT_SNOOP", + "TNOx_L2_EXT_SNOOP_STALL", + "TNOx_L2_EXT_SNOOP_RESP_CLEAN", + "TNOx_L2_EXT_SNOOP_RESP_DATA", + "TNOx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G57 */ + static const char * const hardware_counters_mali_tNAx[] = { + /* Job Manager */ + "", + "", + "", + "", + "TNAx_MESSAGES_SENT", + "TNAx_MESSAGES_RECEIVED", + "TNAx_GPU_ACTIVE", + "TNAx_IRQ_ACTIVE", + "TNAx_JS0_JOBS", + "TNAx_JS0_TASKS", + "TNAx_JS0_ACTIVE", + "TNAx_JS0_WAIT_FLUSH", + "TNAx_JS0_WAIT_READ", + "TNAx_JS0_WAIT_ISSUE", + "TNAx_JS0_WAIT_DEPEND", + "TNAx_JS0_WAIT_FINISH", + "TNAx_JS1_JOBS", + "TNAx_JS1_TASKS", + "TNAx_JS1_ACTIVE", + "TNAx_JS1_WAIT_FLUSH", + "TNAx_JS1_WAIT_READ", + "TNAx_JS1_WAIT_ISSUE", + "TNAx_JS1_WAIT_DEPEND", + "TNAx_JS1_WAIT_FINISH", + "TNAx_JS2_JOBS", + "TNAx_JS2_TASKS", + "TNAx_JS2_ACTIVE", + "TNAx_JS2_WAIT_FLUSH", + "TNAx_JS2_WAIT_READ", + "TNAx_JS2_WAIT_ISSUE", + "TNAx_JS2_WAIT_DEPEND", + "TNAx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TNAx_CACHE_FLUSH", + + /* Tiler */ + "", + "", + "", + "", + "TNAx_TILER_ACTIVE", + "TNAx_JOBS_PROCESSED", + "TNAx_TRIANGLES", + "TNAx_LINES", + "TNAx_POINTS", + "TNAx_FRONT_FACING", + "TNAx_BACK_FACING", + "TNAx_PRIM_VISIBLE", + "TNAx_PRIM_CULLED", + "TNAx_PRIM_CLIPPED", + "TNAx_PRIM_SAT_CULLED", + "TNAx_BIN_ALLOC_INIT", + "TNAx_BIN_ALLOC_OVERFLOW", + "TNAx_BUS_READ", + "TNAx_BUS_WRITE_UTLB0", + "TNAx_BUS_WRITE_UTLB1", + "TNAx_LOADING_DESC", + "TNAx_IDVS_POS_SHAD_REQ", + "TNAx_IDVS_POS_SHAD_WAIT", + "TNAx_IDVS_POS_SHAD_STALL", + "TNAx_IDVS_POS_FIFO_FULL", + "TNAx_PREFETCH_STALL", + "TNAx_VCACHE_HIT", + "TNAx_VCACHE_MISS", + "TNAx_VCACHE_LINE_WAIT", + "TNAx_VFETCH_POS_READ_WAIT", + "TNAx_VFETCH_VERTEX_WAIT", + "TNAx_VFETCH_STALL", + "TNAx_PRIMASSY_STALL", + "TNAx_BBOX_GEN_STALL", + "TNAx_IDVS_VBU_HIT", + "TNAx_IDVS_VBU_MISS", + "TNAx_IDVS_VBU_LINE_DEALLOCATE", + "TNAx_IDVS_VAR_SHAD_REQ", + "TNAx_IDVS_VAR_SHAD_STALL", + "TNAx_BINNER_STALL", + "TNAx_ITER_STALL", + "TNAx_COMPRESS_MISS", + "TNAx_COMPRESS_STALL", + "TNAx_PCACHE_HIT", + "TNAx_PCACHE_MISS", + "TNAx_PCACHE_MISS_STALL", + "TNAx_PCACHE_EVICT_STALL", + "TNAx_PMGR_PTR_WR_STALL", + "TNAx_PMGR_PTR_RD_STALL", + "TNAx_PMGR_CMD_WR_STALL", + "TNAx_WRBUF_ACTIVE", + "TNAx_WRBUF_HIT", + "TNAx_WRBUF_MISS", + "TNAx_WRBUF_NO_FREE_LINE_STALL", + "TNAx_WRBUF_NO_ASN_ID_STALL", + "TNAx_WRBUF_ASN_STALL", + "TNAx_UTLB0_TRANS", + "TNAx_UTLB0_TRANS_HIT", + "TNAx_UTLB0_TRANS_STALL", + "TNAx_UTLB0_MMU_REQ", + "TNAx_UTLB1_TRANS", + "TNAx_UTLB1_TRANS_HIT", + "TNAx_UTLB1_TRANS_STALL", + "TNAx_UTLB1_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TNAx_FRAG_ACTIVE", + "TNAx_FRAG_PRIMITIVES_OUT", + "TNAx_FRAG_PRIM_RAST", + "TNAx_FRAG_FPK_ACTIVE", + "TNAx_FRAG_STARVING", + "TNAx_FRAG_WARPS", + "TNAx_FRAG_PARTIAL_QUADS_RAST", + "TNAx_FRAG_QUADS_RAST", + "TNAx_FRAG_QUADS_EZS_TEST", + "TNAx_FRAG_QUADS_EZS_UPDATE", + "TNAx_FRAG_QUADS_EZS_KILL", + "TNAx_FRAG_LZS_TEST", + "TNAx_FRAG_LZS_KILL", + "TNAx_WARP_REG_SIZE_64", + "TNAx_FRAG_PTILES", + "TNAx_FRAG_TRANS_ELIM", + "TNAx_QUAD_FPK_KILLER", + "TNAx_FULL_QUAD_WARPS", + "TNAx_COMPUTE_ACTIVE", + "TNAx_COMPUTE_TASKS", + "TNAx_COMPUTE_WARPS", + "TNAx_COMPUTE_STARVING", + "TNAx_EXEC_CORE_ACTIVE", + "TNAx_EXEC_INSTR_FMA", + "TNAx_EXEC_INSTR_CVT", + "TNAx_EXEC_INSTR_SFU", + "TNAx_EXEC_INSTR_MSG", + "TNAx_EXEC_INSTR_DIVERGED", + "TNAx_EXEC_ICACHE_MISS", + "TNAx_EXEC_STARVE_ARITH", + "TNAx_CALL_BLEND_SHADER", + "TNAx_TEX_MSGI_NUM_FLITS", + "TNAx_TEX_DFCH_CLK_STALLED", + "TNAx_TEX_TFCH_CLK_STALLED", + "TNAx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", + "TNAx_TEX_FILT_NUM_OPERATIONS", + "TNAx_TEX_FILT_NUM_FXR_OPERATIONS", + "TNAx_TEX_FILT_NUM_FST_OPERATIONS", + "TNAx_TEX_MSGO_NUM_MSG", + "TNAx_TEX_MSGO_NUM_FLITS", + "TNAx_LS_MEM_READ_FULL", + "TNAx_LS_MEM_READ_SHORT", + "TNAx_LS_MEM_WRITE_FULL", + "TNAx_LS_MEM_WRITE_SHORT", + "TNAx_LS_MEM_ATOMIC", + "TNAx_VARY_INSTR", + "TNAx_VARY_SLOT_32", + "TNAx_VARY_SLOT_16", + "TNAx_ATTR_INSTR", + "TNAx_ARITH_INSTR_FP_MUL", + "TNAx_BEATS_RD_FTC", + "TNAx_BEATS_RD_FTC_EXT", + "TNAx_BEATS_RD_LSC", + "TNAx_BEATS_RD_LSC_EXT", + "TNAx_BEATS_RD_TEX", + "TNAx_BEATS_RD_TEX_EXT", + "TNAx_BEATS_RD_OTHER", + "TNAx_BEATS_WR_LSC_OTHER", + "TNAx_BEATS_WR_TIB", + "TNAx_BEATS_WR_LSC_WB", + + /* Memory System */ + "", + "", + "", + "", + "TNAx_MMU_REQUESTS", + "TNAx_MMU_TABLE_READS_L3", + "TNAx_MMU_TABLE_READS_L2", + "TNAx_MMU_HIT_L3", + "TNAx_MMU_HIT_L2", + "TNAx_MMU_S2_REQUESTS", + "TNAx_MMU_S2_TABLE_READS_L3", + "TNAx_MMU_S2_TABLE_READS_L2", + "TNAx_MMU_S2_HIT_L3", + "TNAx_MMU_S2_HIT_L2", + "", + "", + "TNAx_L2_RD_MSG_IN", + "TNAx_L2_RD_MSG_IN_STALL", + "TNAx_L2_WR_MSG_IN", + "TNAx_L2_WR_MSG_IN_STALL", + "TNAx_L2_SNP_MSG_IN", + "TNAx_L2_SNP_MSG_IN_STALL", + "TNAx_L2_RD_MSG_OUT", + "TNAx_L2_RD_MSG_OUT_STALL", + "TNAx_L2_WR_MSG_OUT", + "TNAx_L2_ANY_LOOKUP", + "TNAx_L2_READ_LOOKUP", + "TNAx_L2_WRITE_LOOKUP", + "TNAx_L2_EXT_SNOOP_LOOKUP", + "TNAx_L2_EXT_READ", + "TNAx_L2_EXT_READ_NOSNP", + "TNAx_L2_EXT_READ_UNIQUE", + "TNAx_L2_EXT_READ_BEATS", + "TNAx_L2_EXT_AR_STALL", + "TNAx_L2_EXT_AR_CNT_Q1", + "TNAx_L2_EXT_AR_CNT_Q2", + "TNAx_L2_EXT_AR_CNT_Q3", + "TNAx_L2_EXT_RRESP_0_127", + "TNAx_L2_EXT_RRESP_128_191", + "TNAx_L2_EXT_RRESP_192_255", + "TNAx_L2_EXT_RRESP_256_319", + "TNAx_L2_EXT_RRESP_320_383", + "TNAx_L2_EXT_WRITE", + "TNAx_L2_EXT_WRITE_NOSNP_FULL", + "TNAx_L2_EXT_WRITE_NOSNP_PTL", + "TNAx_L2_EXT_WRITE_SNP_FULL", + "TNAx_L2_EXT_WRITE_SNP_PTL", + "TNAx_L2_EXT_WRITE_BEATS", + "TNAx_L2_EXT_W_STALL", + "TNAx_L2_EXT_AW_CNT_Q1", + "TNAx_L2_EXT_AW_CNT_Q2", + "TNAx_L2_EXT_AW_CNT_Q3", + "TNAx_L2_EXT_SNOOP", + "TNAx_L2_EXT_SNOOP_STALL", + "TNAx_L2_EXT_SNOOP_RESP_CLEAN", + "TNAx_L2_EXT_SNOOP_RESP_DATA", + "TNAx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G68 */ + static const char * const hardware_counters_mali_tOTx[] = { + /* Job Manager */ + "", + "", + "", + "", + "TOTx_MESSAGES_SENT", + "TOTx_MESSAGES_RECEIVED", + "TOTx_GPU_ACTIVE", + "TOTx_IRQ_ACTIVE", + "TOTx_JS0_JOBS", + "TOTx_JS0_TASKS", + "TOTx_JS0_ACTIVE", + "TOTx_JS0_WAIT_FLUSH", + "TOTx_JS0_WAIT_READ", + "TOTx_JS0_WAIT_ISSUE", + "TOTx_JS0_WAIT_DEPEND", + "TOTx_JS0_WAIT_FINISH", + "TOTx_JS1_JOBS", + "TOTx_JS1_TASKS", + "TOTx_JS1_ACTIVE", + "TOTx_JS1_WAIT_FLUSH", + "TOTx_JS1_WAIT_READ", + "TOTx_JS1_WAIT_ISSUE", + "TOTx_JS1_WAIT_DEPEND", + "TOTx_JS1_WAIT_FINISH", + "TOTx_JS2_JOBS", + "TOTx_JS2_TASKS", + "TOTx_JS2_ACTIVE", + "TOTx_JS2_WAIT_FLUSH", + "TOTx_JS2_WAIT_READ", + "TOTx_JS2_WAIT_ISSUE", + "TOTx_JS2_WAIT_DEPEND", + "TOTx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TOTx_CACHE_FLUSH", + + /* Tiler */ + "", + "", + "", + "", + "TOTx_TILER_ACTIVE", + "TOTx_JOBS_PROCESSED", + "TOTx_TRIANGLES", + "TOTx_LINES", + "TOTx_POINTS", + "TOTx_FRONT_FACING", + "TOTx_BACK_FACING", + "TOTx_PRIM_VISIBLE", + "TOTx_PRIM_CULLED", + "TOTx_PRIM_CLIPPED", + "TOTx_PRIM_SAT_CULLED", + "TOTx_BIN_ALLOC_INIT", + "TOTx_BIN_ALLOC_OVERFLOW", + "TOTx_BUS_READ", + "TOTx_BUS_WRITE_UTLB0", + "TOTx_BUS_WRITE_UTLB1", + "TOTx_LOADING_DESC", + "TOTx_IDVS_POS_SHAD_REQ", + "TOTx_IDVS_POS_SHAD_WAIT", + "TOTx_IDVS_POS_SHAD_STALL", + "TOTx_IDVS_POS_FIFO_FULL", + "TOTx_PREFETCH_STALL", + "TOTx_VCACHE_HIT", + "TOTx_VCACHE_MISS", + "TOTx_VCACHE_LINE_WAIT", + "TOTx_VFETCH_POS_READ_WAIT", + "TOTx_VFETCH_VERTEX_WAIT", + "TOTx_VFETCH_STALL", + "TOTx_PRIMASSY_STALL", + "TOTx_BBOX_GEN_STALL", + "TOTx_IDVS_VBU_HIT", + "TOTx_IDVS_VBU_MISS", + "TOTx_IDVS_VBU_LINE_DEALLOCATE", + "TOTx_IDVS_VAR_SHAD_REQ", + "TOTx_IDVS_VAR_SHAD_STALL", + "TOTx_BINNER_STALL", + "TOTx_ITER_STALL", + "TOTx_COMPRESS_MISS", + "TOTx_COMPRESS_STALL", + "TOTx_PCACHE_HIT", + "TOTx_PCACHE_MISS", + "TOTx_PCACHE_MISS_STALL", + "TOTx_PCACHE_EVICT_STALL", + "TOTx_PMGR_PTR_WR_STALL", + "TOTx_PMGR_PTR_RD_STALL", + "TOTx_PMGR_CMD_WR_STALL", + "TOTx_WRBUF_ACTIVE", + "TOTx_WRBUF_HIT", + "TOTx_WRBUF_MISS", + "TOTx_WRBUF_NO_FREE_LINE_STALL", + "TOTx_WRBUF_NO_ASN_ID_STALL", + "TOTx_WRBUF_ASN_STALL", + "TOTx_UTLB0_TRANS", + "TOTx_UTLB0_TRANS_HIT", + "TOTx_UTLB0_TRANS_STALL", + "TOTx_UTLB0_MMU_REQ", + "TOTx_UTLB1_TRANS", + "TOTx_UTLB1_TRANS_HIT", + "TOTx_UTLB1_TRANS_STALL", + "TOTx_UTLB1_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TOTx_FRAG_ACTIVE", + "TOTx_FRAG_PRIMITIVES_OUT", + "TOTx_FRAG_PRIM_RAST", + "TOTx_FRAG_FPK_ACTIVE", + "TOTx_FRAG_STARVING", + "TOTx_FRAG_WARPS", + "TOTx_FRAG_PARTIAL_QUADS_RAST", + "TOTx_FRAG_QUADS_RAST", + "TOTx_FRAG_QUADS_EZS_TEST", + "TOTx_FRAG_QUADS_EZS_UPDATE", + "TOTx_FRAG_QUADS_EZS_KILL", + "TOTx_FRAG_LZS_TEST", + "TOTx_FRAG_LZS_KILL", + "TOTx_WARP_REG_SIZE_64", + "TOTx_FRAG_PTILES", + "TOTx_FRAG_TRANS_ELIM", + "TOTx_QUAD_FPK_KILLER", + "TOTx_FULL_QUAD_WARPS", + "TOTx_COMPUTE_ACTIVE", + "TOTx_COMPUTE_TASKS", + "TOTx_COMPUTE_WARPS", + "TOTx_COMPUTE_STARVING", + "TOTx_EXEC_CORE_ACTIVE", + "TOTx_EXEC_INSTR_FMA", + "TOTx_EXEC_INSTR_CVT", + "TOTx_EXEC_INSTR_SFU", + "TOTx_EXEC_INSTR_MSG", + "TOTx_EXEC_INSTR_DIVERGED", + "TOTx_EXEC_ICACHE_MISS", + "TOTx_EXEC_STARVE_ARITH", + "TOTx_CALL_BLEND_SHADER", + "TOTx_TEX_MSGI_NUM_FLITS", + "TOTx_TEX_DFCH_CLK_STALLED", + "TOTx_TEX_TFCH_CLK_STALLED", + "TOTx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", + "TOTx_TEX_FILT_NUM_OPERATIONS", + "TOTx_TEX_FILT_NUM_FXR_OPERATIONS", + "TOTx_TEX_FILT_NUM_FST_OPERATIONS", + "TOTx_TEX_MSGO_NUM_MSG", + "TOTx_TEX_MSGO_NUM_FLITS", + "TOTx_LS_MEM_READ_FULL", + "TOTx_LS_MEM_READ_SHORT", + "TOTx_LS_MEM_WRITE_FULL", + "TOTx_LS_MEM_WRITE_SHORT", + "TOTx_LS_MEM_ATOMIC", + "TOTx_VARY_INSTR", + "TOTx_VARY_SLOT_32", + "TOTx_VARY_SLOT_16", + "TOTx_ATTR_INSTR", + "TOTx_SHADER_CORE_ACTIVE", + "TOTx_BEATS_RD_FTC", + "TOTx_BEATS_RD_FTC_EXT", + "TOTx_BEATS_RD_LSC", + "TOTx_BEATS_RD_LSC_EXT", + "TOTx_BEATS_RD_TEX", + "TOTx_BEATS_RD_TEX_EXT", + "TOTx_BEATS_RD_OTHER", + "TOTx_BEATS_WR_LSC_OTHER", + "TOTx_BEATS_WR_TIB", + "TOTx_BEATS_WR_LSC_WB", + + /* Memory System */ + "", + "", + "", + "", + "TOTx_MMU_REQUESTS", + "TOTx_MMU_TABLE_READS_L3", + "TOTx_MMU_TABLE_READS_L2", + "TOTx_MMU_HIT_L3", + "TOTx_MMU_HIT_L2", + "TOTx_MMU_S2_REQUESTS", + "TOTx_MMU_S2_TABLE_READS_L3", + "TOTx_MMU_S2_TABLE_READS_L2", + "TOTx_MMU_S2_HIT_L3", + "TOTx_MMU_S2_HIT_L2", + "", + "", + "TOTx_L2_RD_MSG_IN", + "TOTx_L2_RD_MSG_IN_STALL", + "TOTx_L2_WR_MSG_IN", + "TOTx_L2_WR_MSG_IN_STALL", + "TOTx_L2_SNP_MSG_IN", + "TOTx_L2_SNP_MSG_IN_STALL", + "TOTx_L2_RD_MSG_OUT", + "TOTx_L2_RD_MSG_OUT_STALL", + "TOTx_L2_WR_MSG_OUT", + "TOTx_L2_ANY_LOOKUP", + "TOTx_L2_READ_LOOKUP", + "TOTx_L2_WRITE_LOOKUP", + "TOTx_L2_EXT_SNOOP_LOOKUP", + "TOTx_L2_EXT_READ", + "TOTx_L2_EXT_READ_NOSNP", + "TOTx_L2_EXT_READ_UNIQUE", + "TOTx_L2_EXT_READ_BEATS", + "TOTx_L2_EXT_AR_STALL", + "TOTx_L2_EXT_AR_CNT_Q1", + "TOTx_L2_EXT_AR_CNT_Q2", + "TOTx_L2_EXT_AR_CNT_Q3", + "TOTx_L2_EXT_RRESP_0_127", + "TOTx_L2_EXT_RRESP_128_191", + "TOTx_L2_EXT_RRESP_192_255", + "TOTx_L2_EXT_RRESP_256_319", + "TOTx_L2_EXT_RRESP_320_383", + "TOTx_L2_EXT_WRITE", + "TOTx_L2_EXT_WRITE_NOSNP_FULL", + "TOTx_L2_EXT_WRITE_NOSNP_PTL", + "TOTx_L2_EXT_WRITE_SNP_FULL", + "TOTx_L2_EXT_WRITE_SNP_PTL", + "TOTx_L2_EXT_WRITE_BEATS", + "TOTx_L2_EXT_W_STALL", + "TOTx_L2_EXT_AW_CNT_Q1", + "TOTx_L2_EXT_AW_CNT_Q2", + "TOTx_L2_EXT_AW_CNT_Q3", + "TOTx_L2_EXT_SNOOP", + "TOTx_L2_EXT_SNOOP_STALL", + "TOTx_L2_EXT_SNOOP_RESP_CLEAN", + "TOTx_L2_EXT_SNOOP_RESP_DATA", + "TOTx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G77 */ + static const char * const hardware_counters_mali_tTRx[] = { + /* Job Manager */ + "", + "", + "", + "", + "TTRx_MESSAGES_SENT", + "TTRx_MESSAGES_RECEIVED", + "TTRx_GPU_ACTIVE", + "TTRx_IRQ_ACTIVE", + "TTRx_JS0_JOBS", + "TTRx_JS0_TASKS", + "TTRx_JS0_ACTIVE", + "TTRx_JS0_WAIT_FLUSH", + "TTRx_JS0_WAIT_READ", + "TTRx_JS0_WAIT_ISSUE", + "TTRx_JS0_WAIT_DEPEND", + "TTRx_JS0_WAIT_FINISH", + "TTRx_JS1_JOBS", + "TTRx_JS1_TASKS", + "TTRx_JS1_ACTIVE", + "TTRx_JS1_WAIT_FLUSH", + "TTRx_JS1_WAIT_READ", + "TTRx_JS1_WAIT_ISSUE", + "TTRx_JS1_WAIT_DEPEND", + "TTRx_JS1_WAIT_FINISH", + "TTRx_JS2_JOBS", + "TTRx_JS2_TASKS", + "TTRx_JS2_ACTIVE", + "TTRx_JS2_WAIT_FLUSH", + "TTRx_JS2_WAIT_READ", + "TTRx_JS2_WAIT_ISSUE", + "TTRx_JS2_WAIT_DEPEND", + "TTRx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TTRx_CACHE_FLUSH", + + /* Tiler */ + "", + "", + "", + "", + "TTRx_TILER_ACTIVE", + "TTRx_JOBS_PROCESSED", + "TTRx_TRIANGLES", + "TTRx_LINES", + "TTRx_POINTS", + "TTRx_FRONT_FACING", + "TTRx_BACK_FACING", + "TTRx_PRIM_VISIBLE", + "TTRx_PRIM_CULLED", + "TTRx_PRIM_CLIPPED", + "TTRx_PRIM_SAT_CULLED", + "TTRx_BIN_ALLOC_INIT", + "TTRx_BIN_ALLOC_OVERFLOW", + "TTRx_BUS_READ", + "TTRx_BUS_WRITE_UTLB0", + "TTRx_BUS_WRITE_UTLB1", + "TTRx_LOADING_DESC", + "TTRx_IDVS_POS_SHAD_REQ", + "TTRx_IDVS_POS_SHAD_WAIT", + "TTRx_IDVS_POS_SHAD_STALL", + "TTRx_IDVS_POS_FIFO_FULL", + "TTRx_PREFETCH_STALL", + "TTRx_VCACHE_HIT", + "TTRx_VCACHE_MISS", + "TTRx_VCACHE_LINE_WAIT", + "TTRx_VFETCH_POS_READ_WAIT", + "TTRx_VFETCH_VERTEX_WAIT", + "TTRx_VFETCH_STALL", + "TTRx_PRIMASSY_STALL", + "TTRx_BBOX_GEN_STALL", + "TTRx_IDVS_VBU_HIT", + "TTRx_IDVS_VBU_MISS", + "TTRx_IDVS_VBU_LINE_DEALLOCATE", + "TTRx_IDVS_VAR_SHAD_REQ", + "TTRx_IDVS_VAR_SHAD_STALL", + "TTRx_BINNER_STALL", + "TTRx_ITER_STALL", + "TTRx_COMPRESS_MISS", + "TTRx_COMPRESS_STALL", + "TTRx_PCACHE_HIT", + "TTRx_PCACHE_MISS", + "TTRx_PCACHE_MISS_STALL", + "TTRx_PCACHE_EVICT_STALL", + "TTRx_PMGR_PTR_WR_STALL", + "TTRx_PMGR_PTR_RD_STALL", + "TTRx_PMGR_CMD_WR_STALL", + "TTRx_WRBUF_ACTIVE", + "TTRx_WRBUF_HIT", + "TTRx_WRBUF_MISS", + "TTRx_WRBUF_NO_FREE_LINE_STALL", + "TTRx_WRBUF_NO_ASN_ID_STALL", + "TTRx_WRBUF_ASN_STALL", + "TTRx_UTLB0_TRANS", + "TTRx_UTLB0_TRANS_HIT", + "TTRx_UTLB0_TRANS_STALL", + "TTRx_UTLB0_MMU_REQ", + "TTRx_UTLB1_TRANS", + "TTRx_UTLB1_TRANS_HIT", + "TTRx_UTLB1_TRANS_STALL", + "TTRx_UTLB1_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TTRx_FRAG_ACTIVE", + "TTRx_FRAG_PRIMITIVES_OUT", + "TTRx_FRAG_PRIM_RAST", + "TTRx_FRAG_FPK_ACTIVE", + "TTRx_FRAG_STARVING", + "TTRx_FRAG_WARPS", + "TTRx_FRAG_PARTIAL_QUADS_RAST", + "TTRx_FRAG_QUADS_RAST", + "TTRx_FRAG_QUADS_EZS_TEST", + "TTRx_FRAG_QUADS_EZS_UPDATE", + "TTRx_FRAG_QUADS_EZS_KILL", + "TTRx_FRAG_LZS_TEST", + "TTRx_FRAG_LZS_KILL", + "TTRx_WARP_REG_SIZE_64", + "TTRx_FRAG_PTILES", + "TTRx_FRAG_TRANS_ELIM", + "TTRx_QUAD_FPK_KILLER", + "TTRx_FULL_QUAD_WARPS", + "TTRx_COMPUTE_ACTIVE", + "TTRx_COMPUTE_TASKS", + "TTRx_COMPUTE_WARPS", + "TTRx_COMPUTE_STARVING", + "TTRx_EXEC_CORE_ACTIVE", + "TTRx_EXEC_INSTR_FMA", + "TTRx_EXEC_INSTR_CVT", + "TTRx_EXEC_INSTR_SFU", + "TTRx_EXEC_INSTR_MSG", + "TTRx_EXEC_INSTR_DIVERGED", + "TTRx_EXEC_ICACHE_MISS", + "TTRx_EXEC_STARVE_ARITH", + "TTRx_CALL_BLEND_SHADER", + "TTRx_TEX_MSGI_NUM_FLITS", + "TTRx_TEX_DFCH_CLK_STALLED", + "TTRx_TEX_TFCH_CLK_STALLED", + "TTRx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", + "TTRx_TEX_FILT_NUM_OPERATIONS", + "TTRx_TEX_FILT_NUM_FXR_OPERATIONS", + "TTRx_TEX_FILT_NUM_FST_OPERATIONS", + "TTRx_TEX_MSGO_NUM_MSG", + "TTRx_TEX_MSGO_NUM_FLITS", + "TTRx_LS_MEM_READ_FULL", + "TTRx_LS_MEM_READ_SHORT", + "TTRx_LS_MEM_WRITE_FULL", + "TTRx_LS_MEM_WRITE_SHORT", + "TTRx_LS_MEM_ATOMIC", + "TTRx_VARY_INSTR", + "TTRx_VARY_SLOT_32", + "TTRx_VARY_SLOT_16", + "TTRx_ATTR_INSTR", + "TTRx_ARITH_INSTR_FP_MUL", + "TTRx_BEATS_RD_FTC", + "TTRx_BEATS_RD_FTC_EXT", + "TTRx_BEATS_RD_LSC", + "TTRx_BEATS_RD_LSC_EXT", + "TTRx_BEATS_RD_TEX", + "TTRx_BEATS_RD_TEX_EXT", + "TTRx_BEATS_RD_OTHER", + "TTRx_BEATS_WR_LSC_OTHER", + "TTRx_BEATS_WR_TIB", + "TTRx_BEATS_WR_LSC_WB", + + /* Memory System */ + "", + "", + "", + "", + "TTRx_MMU_REQUESTS", + "TTRx_MMU_TABLE_READS_L3", + "TTRx_MMU_TABLE_READS_L2", + "TTRx_MMU_HIT_L3", + "TTRx_MMU_HIT_L2", + "TTRx_MMU_S2_REQUESTS", + "TTRx_MMU_S2_TABLE_READS_L3", + "TTRx_MMU_S2_TABLE_READS_L2", + "TTRx_MMU_S2_HIT_L3", + "TTRx_MMU_S2_HIT_L2", + "", + "", + "TTRx_L2_RD_MSG_IN", + "TTRx_L2_RD_MSG_IN_STALL", + "TTRx_L2_WR_MSG_IN", + "TTRx_L2_WR_MSG_IN_STALL", + "TTRx_L2_SNP_MSG_IN", + "TTRx_L2_SNP_MSG_IN_STALL", + "TTRx_L2_RD_MSG_OUT", + "TTRx_L2_RD_MSG_OUT_STALL", + "TTRx_L2_WR_MSG_OUT", + "TTRx_L2_ANY_LOOKUP", + "TTRx_L2_READ_LOOKUP", + "TTRx_L2_WRITE_LOOKUP", + "TTRx_L2_EXT_SNOOP_LOOKUP", + "TTRx_L2_EXT_READ", + "TTRx_L2_EXT_READ_NOSNP", + "TTRx_L2_EXT_READ_UNIQUE", + "TTRx_L2_EXT_READ_BEATS", + "TTRx_L2_EXT_AR_STALL", + "TTRx_L2_EXT_AR_CNT_Q1", + "TTRx_L2_EXT_AR_CNT_Q2", + "TTRx_L2_EXT_AR_CNT_Q3", + "TTRx_L2_EXT_RRESP_0_127", + "TTRx_L2_EXT_RRESP_128_191", + "TTRx_L2_EXT_RRESP_192_255", + "TTRx_L2_EXT_RRESP_256_319", + "TTRx_L2_EXT_RRESP_320_383", + "TTRx_L2_EXT_WRITE", + "TTRx_L2_EXT_WRITE_NOSNP_FULL", + "TTRx_L2_EXT_WRITE_NOSNP_PTL", + "TTRx_L2_EXT_WRITE_SNP_FULL", + "TTRx_L2_EXT_WRITE_SNP_PTL", + "TTRx_L2_EXT_WRITE_BEATS", + "TTRx_L2_EXT_W_STALL", + "TTRx_L2_EXT_AW_CNT_Q1", + "TTRx_L2_EXT_AW_CNT_Q2", + "TTRx_L2_EXT_AW_CNT_Q3", + "TTRx_L2_EXT_SNOOP", + "TTRx_L2_EXT_SNOOP_STALL", + "TTRx_L2_EXT_SNOOP_RESP_CLEAN", + "TTRx_L2_EXT_SNOOP_RESP_DATA", + "TTRx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G78 */ + static const char * const hardware_counters_mali_tBOx[] = { + /* Job Manager */ + "", + "", + "", + "", + "TBOx_MESSAGES_SENT", + "TBOx_MESSAGES_RECEIVED", + "TBOx_GPU_ACTIVE", + "TBOx_IRQ_ACTIVE", + "TBOx_JS0_JOBS", + "TBOx_JS0_TASKS", + "TBOx_JS0_ACTIVE", + "TBOx_JS0_WAIT_FLUSH", + "TBOx_JS0_WAIT_READ", + "TBOx_JS0_WAIT_ISSUE", + "TBOx_JS0_WAIT_DEPEND", + "TBOx_JS0_WAIT_FINISH", + "TBOx_JS1_JOBS", + "TBOx_JS1_TASKS", + "TBOx_JS1_ACTIVE", + "TBOx_JS1_WAIT_FLUSH", + "TBOx_JS1_WAIT_READ", + "TBOx_JS1_WAIT_ISSUE", + "TBOx_JS1_WAIT_DEPEND", + "TBOx_JS1_WAIT_FINISH", + "TBOx_JS2_JOBS", + "TBOx_JS2_TASKS", + "TBOx_JS2_ACTIVE", + "TBOx_JS2_WAIT_FLUSH", + "TBOx_JS2_WAIT_READ", + "TBOx_JS2_WAIT_ISSUE", + "TBOx_JS2_WAIT_DEPEND", + "TBOx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TBOx_CACHE_FLUSH", + + /* Tiler */ + "", + "", + "", + "", + "TBOx_TILER_ACTIVE", + "TBOx_JOBS_PROCESSED", + "TBOx_TRIANGLES", + "TBOx_LINES", + "TBOx_POINTS", + "TBOx_FRONT_FACING", + "TBOx_BACK_FACING", + "TBOx_PRIM_VISIBLE", + "TBOx_PRIM_CULLED", + "TBOx_PRIM_CLIPPED", + "TBOx_PRIM_SAT_CULLED", + "TBOx_BIN_ALLOC_INIT", + "TBOx_BIN_ALLOC_OVERFLOW", + "TBOx_BUS_READ", + "TBOx_BUS_WRITE_UTLB0", + "TBOx_BUS_WRITE_UTLB1", + "TBOx_LOADING_DESC", + "TBOx_IDVS_POS_SHAD_REQ", + "TBOx_IDVS_POS_SHAD_WAIT", + "TBOx_IDVS_POS_SHAD_STALL", + "TBOx_IDVS_POS_FIFO_FULL", + "TBOx_PREFETCH_STALL", + "TBOx_VCACHE_HIT", + "TBOx_VCACHE_MISS", + "TBOx_VCACHE_LINE_WAIT", + "TBOx_VFETCH_POS_READ_WAIT", + "TBOx_VFETCH_VERTEX_WAIT", + "TBOx_VFETCH_STALL", + "TBOx_PRIMASSY_STALL", + "TBOx_BBOX_GEN_STALL", + "TBOx_IDVS_VBU_HIT", + "TBOx_IDVS_VBU_MISS", + "TBOx_IDVS_VBU_LINE_DEALLOCATE", + "TBOx_IDVS_VAR_SHAD_REQ", + "TBOx_IDVS_VAR_SHAD_STALL", + "TBOx_BINNER_STALL", + "TBOx_ITER_STALL", + "TBOx_COMPRESS_MISS", + "TBOx_COMPRESS_STALL", + "TBOx_PCACHE_HIT", + "TBOx_PCACHE_MISS", + "TBOx_PCACHE_MISS_STALL", + "TBOx_PCACHE_EVICT_STALL", + "TBOx_PMGR_PTR_WR_STALL", + "TBOx_PMGR_PTR_RD_STALL", + "TBOx_PMGR_CMD_WR_STALL", + "TBOx_WRBUF_ACTIVE", + "TBOx_WRBUF_HIT", + "TBOx_WRBUF_MISS", + "TBOx_WRBUF_NO_FREE_LINE_STALL", + "TBOx_WRBUF_NO_ASN_ID_STALL", + "TBOx_WRBUF_ASN_STALL", + "TBOx_UTLB0_TRANS", + "TBOx_UTLB0_TRANS_HIT", + "TBOx_UTLB0_TRANS_STALL", + "TBOx_UTLB0_MMU_REQ", + "TBOx_UTLB1_TRANS", + "TBOx_UTLB1_TRANS_HIT", + "TBOx_UTLB1_TRANS_STALL", + "TBOx_UTLB1_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TBOx_FRAG_ACTIVE", + "TBOx_FRAG_PRIMITIVES_OUT", + "TBOx_FRAG_PRIM_RAST", + "TBOx_FRAG_FPK_ACTIVE", + "TBOx_FRAG_STARVING", + "TBOx_FRAG_WARPS", + "TBOx_FRAG_PARTIAL_QUADS_RAST", + "TBOx_FRAG_QUADS_RAST", + "TBOx_FRAG_QUADS_EZS_TEST", + "TBOx_FRAG_QUADS_EZS_UPDATE", + "TBOx_FRAG_QUADS_EZS_KILL", + "TBOx_FRAG_LZS_TEST", + "TBOx_FRAG_LZS_KILL", + "TBOx_WARP_REG_SIZE_64", + "TBOx_FRAG_PTILES", + "TBOx_FRAG_TRANS_ELIM", + "TBOx_QUAD_FPK_KILLER", + "TBOx_FULL_QUAD_WARPS", + "TBOx_COMPUTE_ACTIVE", + "TBOx_COMPUTE_TASKS", + "TBOx_COMPUTE_WARPS", + "TBOx_COMPUTE_STARVING", + "TBOx_EXEC_CORE_ACTIVE", + "TBOx_EXEC_INSTR_FMA", + "TBOx_EXEC_INSTR_CVT", + "TBOx_EXEC_INSTR_SFU", + "TBOx_EXEC_INSTR_MSG", + "TBOx_EXEC_INSTR_DIVERGED", + "TBOx_EXEC_ICACHE_MISS", + "TBOx_EXEC_STARVE_ARITH", + "TBOx_CALL_BLEND_SHADER", + "TBOx_TEX_MSGI_NUM_FLITS", + "TBOx_TEX_DFCH_CLK_STALLED", + "TBOx_TEX_TFCH_CLK_STALLED", + "TBOx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", + "TBOx_TEX_FILT_NUM_OPERATIONS", + "TBOx_TEX_FILT_NUM_FXR_OPERATIONS", + "TBOx_TEX_FILT_NUM_FST_OPERATIONS", + "TBOx_TEX_MSGO_NUM_MSG", + "TBOx_TEX_MSGO_NUM_FLITS", + "TBOx_LS_MEM_READ_FULL", + "TBOx_LS_MEM_READ_SHORT", + "TBOx_LS_MEM_WRITE_FULL", + "TBOx_LS_MEM_WRITE_SHORT", + "TBOx_LS_MEM_ATOMIC", + "TBOx_VARY_INSTR", + "TBOx_VARY_SLOT_32", + "TBOx_VARY_SLOT_16", + "TBOx_ATTR_INSTR", + "TBOx_SHADER_CORE_ACTIVE", + "TBOx_BEATS_RD_FTC", + "TBOx_BEATS_RD_FTC_EXT", + "TBOx_BEATS_RD_LSC", + "TBOx_BEATS_RD_LSC_EXT", + "TBOx_BEATS_RD_TEX", + "TBOx_BEATS_RD_TEX_EXT", + "TBOx_BEATS_RD_OTHER", + "TBOx_BEATS_WR_LSC_OTHER", + "TBOx_BEATS_WR_TIB", + "TBOx_BEATS_WR_LSC_WB", + + /* Memory System */ + "", + "", + "", + "", + "TBOx_MMU_REQUESTS", + "TBOx_MMU_TABLE_READS_L3", + "TBOx_MMU_TABLE_READS_L2", + "TBOx_MMU_HIT_L3", + "TBOx_MMU_HIT_L2", + "TBOx_MMU_S2_REQUESTS", + "TBOx_MMU_S2_TABLE_READS_L3", + "TBOx_MMU_S2_TABLE_READS_L2", + "TBOx_MMU_S2_HIT_L3", + "TBOx_MMU_S2_HIT_L2", + "", + "", + "TBOx_L2_RD_MSG_IN", + "TBOx_L2_RD_MSG_IN_STALL", + "TBOx_L2_WR_MSG_IN", + "TBOx_L2_WR_MSG_IN_STALL", + "TBOx_L2_SNP_MSG_IN", + "TBOx_L2_SNP_MSG_IN_STALL", + "TBOx_L2_RD_MSG_OUT", + "TBOx_L2_RD_MSG_OUT_STALL", + "TBOx_L2_WR_MSG_OUT", + "TBOx_L2_ANY_LOOKUP", + "TBOx_L2_READ_LOOKUP", + "TBOx_L2_WRITE_LOOKUP", + "TBOx_L2_EXT_SNOOP_LOOKUP", + "TBOx_L2_EXT_READ", + "TBOx_L2_EXT_READ_NOSNP", + "TBOx_L2_EXT_READ_UNIQUE", + "TBOx_L2_EXT_READ_BEATS", + "TBOx_L2_EXT_AR_STALL", + "TBOx_L2_EXT_AR_CNT_Q1", + "TBOx_L2_EXT_AR_CNT_Q2", + "TBOx_L2_EXT_AR_CNT_Q3", + "TBOx_L2_EXT_RRESP_0_127", + "TBOx_L2_EXT_RRESP_128_191", + "TBOx_L2_EXT_RRESP_192_255", + "TBOx_L2_EXT_RRESP_256_319", + "TBOx_L2_EXT_RRESP_320_383", + "TBOx_L2_EXT_WRITE", + "TBOx_L2_EXT_WRITE_NOSNP_FULL", + "TBOx_L2_EXT_WRITE_NOSNP_PTL", + "TBOx_L2_EXT_WRITE_SNP_FULL", + "TBOx_L2_EXT_WRITE_SNP_PTL", + "TBOx_L2_EXT_WRITE_BEATS", + "TBOx_L2_EXT_W_STALL", + "TBOx_L2_EXT_AW_CNT_Q1", + "TBOx_L2_EXT_AW_CNT_Q2", + "TBOx_L2_EXT_AW_CNT_Q3", + "TBOx_L2_EXT_SNOOP", + "TBOx_L2_EXT_SNOOP_STALL", + "TBOx_L2_EXT_SNOOP_RESP_CLEAN", + "TBOx_L2_EXT_SNOOP_RESP_DATA", + "TBOx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G310 */ + static const char * const hardware_counters_mali_tVAx[] = { + /* CSF */ + "", + "", + "", + "", + "TVAx_GPU_ACTIVE", + "TVAx_MCU_ACTIVE", + "TVAx_GPU_ITER_ACTIVE", + "TVAx_MMU_FLUSH_COUNT", + "TVAx_MESSAGES_SENT", + "TVAx_MESSAGES_RECEIVED", + "TVAx_GPU_IRQ_ACTIVE", + "TVAx_GPU_IRQ_COUNT", + "TVAx_CACHE_FLUSH_CYCLES", + "TVAx_CACHE_FLUSH", + "TVAx_DOORBELL_IRQ_ACTIVE", + "TVAx_DOORBELL_IRQ_COUNT", + "TVAx_ITER_TILER_ACTIVE", + "TVAx_ITER_TILER_JOB_COMPLETED", + "TVAx_ITER_TILER_IDVS_TASK_COMPLETED", + "TVAx_ITER_TILER_TOTAL_IDVS_TASK_ACTIVE_CYCLES", + "TVAx_ITER_TILER_IRQ_ACTIVE", + "TVAx_ITER_TILER_IRQ_COUNT", + "TVAx_ITER_TILER_READY_BLOCKED", + "TVAx_ITER_TILER_EP_DRAIN", + "TVAx_ITER_COMP_ACTIVE", + "TVAx_ITER_COMP_JOB_COMPLETED", + "TVAx_ITER_COMP_TASK_COMPLETED", + "TVAx_ITER_COMP_TOTAL_TASK_ACTIVE_CYCLES", + "TVAx_ITER_COMP_IRQ_ACTIVE", + "TVAx_ITER_COMP_IRQ_COUNT", + "TVAx_ITER_COMP_READY_BLOCKED", + "TVAx_ITER_COMP_EP_DRAIN", + "TVAx_ITER_FRAG_ACTIVE", + "TVAx_ITER_FRAG_JOB_COMPLETED", + "TVAx_ITER_FRAG_TASK_COMPLETED", + "TVAx_ITER_FRAG_TOTAL_TASK_ACTIVE_CYCLES", + "TVAx_ITER_FRAG_IRQ_ACTIVE", + "TVAx_ITER_FRAG_IRQ_COUNT", + "TVAx_ITER_FRAG_READY_BLOCKED", + "TVAx_ITER_FRAG_TILE_MAP_READ_WAIT", + "TVAx_CEU_ACTIVE", + "TVAx_CEU_READY_BLOCKED", + "TVAx_CEU_COMMAND_COUNT", + "TVAx_CEU_STATE_TRANSFER_STALLED", + "TVAx_CEU_LSU_REQUEST_STALLED", + "TVAx_LSU_ACTIVE", + "TVAx_LSU_OPERATIONS_COMPLETED", + "TVAx_LSU_TOTAL_HANDLERS_ACTIVE_CYCLES", + "TVAx_CSHWIF0_ENABLED", + "TVAx_CSHWIF0_PREFETCH_MISS_COUNT", + "TVAx_CSHWIF0_IRQ_ACTIVE", + "TVAx_CSHWIF0_WAIT_BLOCKED", + "TVAx_CSHWIF1_ENABLED", + "TVAx_CSHWIF1_PREFETCH_MISS_COUNT", + "TVAx_CSHWIF1_IRQ_ACTIVE", + "TVAx_CSHWIF1_WAIT_BLOCKED", + "TVAx_CSHWIF2_ENABLED", + "TVAx_CSHWIF2_PREFETCH_MISS_COUNT", + "TVAx_CSHWIF2_IRQ_ACTIVE", + "TVAx_CSHWIF2_WAIT_BLOCKED", + "TVAx_CSHWIF3_ENABLED", + "TVAx_CSHWIF3_PREFETCH_MISS_COUNT", + "TVAx_CSHWIF3_IRQ_ACTIVE", + "TVAx_CSHWIF3_WAIT_BLOCKED", + + /* Tiler */ + "", + "", + "", + "", + "TVAx_TILER_ACTIVE", + "TVAx_JOBS_PROCESSED", + "TVAx_TRIANGLES", + "TVAx_LINES", + "TVAx_POINTS", + "TVAx_FRONT_FACING", + "TVAx_BACK_FACING", + "TVAx_PRIM_VISIBLE", + "TVAx_PRIM_CULLED", + "TVAx_PRIM_CLIPPED", + "TVAx_PRIM_SAT_CULLED", + "TVAx_BIN_ALLOC_INIT", + "TVAx_BIN_ALLOC_OVERFLOW", + "TVAx_BUS_READ", + "TVAx_BUS_WRITE_UTLB0", + "TVAx_BUS_WRITE_UTLB1", + "TVAx_SUSPENDED", + "TVAx_IDVS_POS_SHAD_REQ", + "TVAx_IDVS_POS_SHAD_WAIT", + "TVAx_IDVS_POS_SHAD_STALL", + "TVAx_IDVS_POS_FIFO_FULL", + "TVAx_PREFETCH_STALL", + "TVAx_VCACHE_HIT", + "TVAx_VCACHE_MISS", + "TVAx_VCACHE_LINE_WAIT", + "TVAx_VFETCH_POS_READ_WAIT", + "TVAx_VFETCH_VERTEX_WAIT", + "TVAx_VFETCH_STALL", + "TVAx_PRIMASSY_STALL", + "TVAx_RESUMED", + "TVAx_IDVS_VBU_HIT", + "TVAx_IDVS_VBU_MISS", + "TVAx_IDVS_VBU_LINE_DEALLOCATE", + "TVAx_IDVS_VAR_SHAD_REQ", + "TVAx_IDVS_VAR_SHAD_STALL", + "TVAx_BINNER_STALL", + "TVAx_ITER_STALL", + "TVAx_COMPRESS_MISS", + "TVAx_COMPRESS_STALL", + "TVAx_PCACHE_HIT", + "TVAx_PCACHE_MISS", + "TVAx_PCACHE_MISS_STALL", + "TVAx_PCACHE_EVICT_STALL", + "TVAx_PMGR_PTR_WR_STALL", + "TVAx_PMGR_PTR_RD_STALL", + "TVAx_PMGR_CMD_WR_STALL", + "TVAx_WRBUF_ACTIVE", + "TVAx_WRBUF_HIT", + "TVAx_WRBUF_MISS", + "TVAx_WRBUF_NO_FREE_LINE_STALL", + "TVAx_WRBUF_NO_ASN_ID_STALL", + "TVAx_WRBUF_ASN_STALL", + "TVAx_UTLB0_TRANS", + "TVAx_UTLB0_TRANS_HIT", + "TVAx_UTLB0_TRANS_STALL", + "TVAx_UTLB0_MMU_REQ", + "TVAx_UTLB1_TRANS", + "TVAx_UTLB1_TRANS_HIT", + "TVAx_UTLB1_TRANS_STALL", + "TVAx_UTLB1_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TVAx_FRAG_ACTIVE", + "TVAx_FRAG_PRIMITIVES_OUT", + "TVAx_FRAG_PRIM_RAST", + "TVAx_FRAG_FPK_ACTIVE", + "TVAx_FRAG_STARVING", + "TVAx_FRAG_WARPS", + "TVAx_FRAG_PARTIAL_QUADS_RAST", + "TVAx_FRAG_QUADS_RAST", + "TVAx_FRAG_QUADS_EZS_TEST", + "TVAx_FRAG_QUADS_EZS_UPDATE", + "TVAx_FRAG_QUADS_EZS_KILL", + "TVAx_FRAG_LZS_TEST", + "TVAx_FRAG_LZS_KILL", + "TVAx_WARP_REG_SIZE_64", + "TVAx_FRAG_PTILES", + "TVAx_FRAG_TRANS_ELIM", + "TVAx_QUAD_FPK_KILLER", + "TVAx_FULL_QUAD_WARPS", + "TVAx_COMPUTE_ACTIVE", + "TVAx_COMPUTE_TASKS", + "TVAx_COMPUTE_WARPS", + "TVAx_COMPUTE_STARVING", + "TVAx_EXEC_CORE_ACTIVE", + "TVAx_EXEC_INSTR_FMA", + "TVAx_EXEC_INSTR_CVT", + "TVAx_EXEC_INSTR_SFU", + "TVAx_EXEC_INSTR_MSG", + "TVAx_EXEC_INSTR_DIVERGED", + "TVAx_EXEC_ICACHE_MISS", + "TVAx_EXEC_STARVE_ARITH", + "TVAx_CALL_BLEND_SHADER", + "TVAx_TEX_MSGI_NUM_FLITS", + "TVAx_TEX_DFCH_CLK_STALLED", + "TVAx_TEX_TFCH_CLK_STALLED", + "TVAx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", + "TVAx_TEX_FILT_NUM_OPERATIONS", + "TVAx_TEX_FILT_NUM_FXR_OPERATIONS", + "TVAx_TEX_FILT_NUM_FST_OPERATIONS", + "TVAx_TEX_MSGO_NUM_MSG", + "TVAx_TEX_MSGO_NUM_FLITS", + "TVAx_LS_MEM_READ_FULL", + "TVAx_LS_MEM_READ_SHORT", + "TVAx_LS_MEM_WRITE_FULL", + "TVAx_LS_MEM_WRITE_SHORT", + "TVAx_LS_MEM_ATOMIC", + "TVAx_VARY_INSTR", + "TVAx_VARY_SLOT_32", + "TVAx_VARY_SLOT_16", + "TVAx_ATTR_INSTR", + "TVAx_SHADER_CORE_ACTIVE", + "TVAx_BEATS_RD_FTC", + "TVAx_BEATS_RD_FTC_EXT", + "TVAx_BEATS_RD_LSC", + "TVAx_BEATS_RD_LSC_EXT", + "TVAx_BEATS_RD_TEX", + "TVAx_BEATS_RD_TEX_EXT", + "TVAx_BEATS_RD_OTHER", + "TVAx_BEATS_WR_LSC_OTHER", + "TVAx_BEATS_WR_TIB", + "TVAx_BEATS_WR_LSC_WB", + + /* Memory System */ + "", + "", + "", + "", + "TVAx_MMU_REQUESTS", + "TVAx_MMU_TABLE_READS_L3", + "TVAx_MMU_TABLE_READS_L2", + "TVAx_MMU_HIT_L3", + "TVAx_MMU_HIT_L2", + "", + "", + "", + "TVAx_L2_RD_MSG_IN_EVICT", + "TVAx_L2_RD_MSG_IN_CU", + "TVAx_L2_SNP_MSG_IN_SNPRSP", + "TVAx_L2_RD_MSG_OUT_SNPREQ", + "TVAx_L2_RD_MSG_IN", + "TVAx_L2_RD_MSG_IN_STALL", + "TVAx_L2_WR_MSG_IN", + "TVAx_L2_WR_MSG_IN_STALL", + "TVAx_L2_SNP_MSG_IN", + "TVAx_L2_SNP_MSG_IN_STALL", + "TVAx_L2_RD_MSG_OUT", + "TVAx_L2_RD_MSG_OUT_STALL", + "TVAx_L2_WR_MSG_OUT", + "TVAx_L2_ANY_LOOKUP", + "TVAx_L2_READ_LOOKUP", + "TVAx_L2_WRITE_LOOKUP", + "TVAx_L2_EXT_SNOOP_LOOKUP", + "TVAx_L2_EXT_READ", + "TVAx_L2_EXT_READ_NOSNP", + "TVAx_L2_EXT_READ_UNIQUE", + "TVAx_L2_EXT_READ_BEATS", + "TVAx_L2_EXT_AR_STALL", + "TVAx_L2_EXT_AR_CNT_Q1", + "TVAx_L2_EXT_AR_CNT_Q2", + "TVAx_L2_EXT_AR_CNT_Q3", + "TVAx_L2_EXT_RRESP_0_127", + "TVAx_L2_EXT_RRESP_128_191", + "TVAx_L2_EXT_RRESP_192_255", + "TVAx_L2_EXT_RRESP_256_319", + "TVAx_L2_EXT_RRESP_320_383", + "TVAx_L2_EXT_WRITE", + "TVAx_L2_EXT_WRITE_NOSNP_FULL", + "TVAx_L2_EXT_WRITE_NOSNP_PTL", + "TVAx_L2_EXT_WRITE_SNP_FULL", + "TVAx_L2_EXT_WRITE_SNP_PTL", + "TVAx_L2_EXT_WRITE_BEATS", + "TVAx_L2_EXT_W_STALL", + "TVAx_L2_EXT_AW_CNT_Q1", + "TVAx_L2_EXT_AW_CNT_Q2", + "TVAx_L2_EXT_AW_CNT_Q3", + "TVAx_L2_EXT_SNOOP", + "TVAx_L2_EXT_SNOOP_STALL", + "TVAx_L2_EXT_SNOOP_RESP_CLEAN", + "TVAx_L2_EXT_SNOOP_RESP_DATA", + "TVAx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G510 */ + static const char * const hardware_counters_mali_tGRx[] = { + /* CSF */ + "", + "", + "", + "", + "TGRx_GPU_ACTIVE", + "TGRx_MCU_ACTIVE", + "TGRx_GPU_ITER_ACTIVE", + "TGRx_MMU_FLUSH_COUNT", + "TGRx_MESSAGES_SENT", + "TGRx_MESSAGES_RECEIVED", + "TGRx_GPU_IRQ_ACTIVE", + "TGRx_GPU_IRQ_COUNT", + "TGRx_CACHE_FLUSH_CYCLES", + "TGRx_CACHE_FLUSH", + "TGRx_DOORBELL_IRQ_ACTIVE", + "TGRx_DOORBELL_IRQ_COUNT", + "TGRx_ITER_TILER_ACTIVE", + "TGRx_ITER_TILER_JOB_COMPLETED", + "TGRx_ITER_TILER_IDVS_TASK_COMPLETED", + "TGRx_ITER_TILER_TOTAL_IDVS_TASK_ACTIVE_CYCLES", + "TGRx_ITER_TILER_IRQ_ACTIVE", + "TGRx_ITER_TILER_IRQ_COUNT", + "TGRx_ITER_TILER_READY_BLOCKED", + "TGRx_ITER_TILER_EP_DRAIN", + "TGRx_ITER_COMP_ACTIVE", + "TGRx_ITER_COMP_JOB_COMPLETED", + "TGRx_ITER_COMP_TASK_COMPLETED", + "TGRx_ITER_COMP_TOTAL_TASK_ACTIVE_CYCLES", + "TGRx_ITER_COMP_IRQ_ACTIVE", + "TGRx_ITER_COMP_IRQ_COUNT", + "TGRx_ITER_COMP_READY_BLOCKED", + "TGRx_ITER_COMP_EP_DRAIN", + "TGRx_ITER_FRAG_ACTIVE", + "TGRx_ITER_FRAG_JOB_COMPLETED", + "TGRx_ITER_FRAG_TASK_COMPLETED", + "TGRx_ITER_FRAG_TOTAL_TASK_ACTIVE_CYCLES", + "TGRx_ITER_FRAG_IRQ_ACTIVE", + "TGRx_ITER_FRAG_IRQ_COUNT", + "TGRx_ITER_FRAG_READY_BLOCKED", + "TGRx_ITER_FRAG_TILE_MAP_READ_WAIT", + "TGRx_CEU_ACTIVE", + "TGRx_CEU_READY_BLOCKED", + "TGRx_CEU_COMMAND_COUNT", + "TGRx_CEU_STATE_TRANSFER_STALLED", + "TGRx_CEU_LSU_REQUEST_STALLED", + "TGRx_LSU_ACTIVE", + "TGRx_LSU_OPERATIONS_COMPLETED", + "TGRx_LSU_TOTAL_HANDLERS_ACTIVE_CYCLES", + "TGRx_CSHWIF0_ENABLED", + "TGRx_CSHWIF0_PREFETCH_MISS_COUNT", + "TGRx_CSHWIF0_IRQ_ACTIVE", + "TGRx_CSHWIF0_WAIT_BLOCKED", + "TGRx_CSHWIF1_ENABLED", + "TGRx_CSHWIF1_PREFETCH_MISS_COUNT", + "TGRx_CSHWIF1_IRQ_ACTIVE", + "TGRx_CSHWIF1_WAIT_BLOCKED", + "TGRx_CSHWIF2_ENABLED", + "TGRx_CSHWIF2_PREFETCH_MISS_COUNT", + "TGRx_CSHWIF2_IRQ_ACTIVE", + "TGRx_CSHWIF2_WAIT_BLOCKED", + "TGRx_CSHWIF3_ENABLED", + "TGRx_CSHWIF3_PREFETCH_MISS_COUNT", + "TGRx_CSHWIF3_IRQ_ACTIVE", + "TGRx_CSHWIF3_WAIT_BLOCKED", + + /* Tiler */ + "", + "", + "", + "", + "TGRx_TILER_ACTIVE", + "TGRx_JOBS_PROCESSED", + "TGRx_TRIANGLES", + "TGRx_LINES", + "TGRx_POINTS", + "TGRx_FRONT_FACING", + "TGRx_BACK_FACING", + "TGRx_PRIM_VISIBLE", + "TGRx_PRIM_CULLED", + "TGRx_PRIM_CLIPPED", + "TGRx_PRIM_SAT_CULLED", + "TGRx_BIN_ALLOC_INIT", + "TGRx_BIN_ALLOC_OVERFLOW", + "TGRx_BUS_READ", + "TGRx_BUS_WRITE_UTLB0", + "TGRx_BUS_WRITE_UTLB1", + "TGRx_SUSPENDED", + "TGRx_IDVS_POS_SHAD_REQ", + "TGRx_IDVS_POS_SHAD_WAIT", + "TGRx_IDVS_POS_SHAD_STALL", + "TGRx_IDVS_POS_FIFO_FULL", + "TGRx_PREFETCH_STALL", + "TGRx_VCACHE_HIT", + "TGRx_VCACHE_MISS", + "TGRx_VCACHE_LINE_WAIT", + "TGRx_VFETCH_POS_READ_WAIT", + "TGRx_VFETCH_VERTEX_WAIT", + "TGRx_VFETCH_STALL", + "TGRx_PRIMASSY_STALL", + "TGRx_RESUMED", + "TGRx_IDVS_VBU_HIT", + "TGRx_IDVS_VBU_MISS", + "TGRx_IDVS_VBU_LINE_DEALLOCATE", + "TGRx_IDVS_VAR_SHAD_REQ", + "TGRx_IDVS_VAR_SHAD_STALL", + "TGRx_BINNER_STALL", + "TGRx_ITER_STALL", + "TGRx_COMPRESS_MISS", + "TGRx_COMPRESS_STALL", + "TGRx_PCACHE_HIT", + "TGRx_PCACHE_MISS", + "TGRx_PCACHE_MISS_STALL", + "TGRx_PCACHE_EVICT_STALL", + "TGRx_PMGR_PTR_WR_STALL", + "TGRx_PMGR_PTR_RD_STALL", + "TGRx_PMGR_CMD_WR_STALL", + "TGRx_WRBUF_ACTIVE", + "TGRx_WRBUF_HIT", + "TGRx_WRBUF_MISS", + "TGRx_WRBUF_NO_FREE_LINE_STALL", + "TGRx_WRBUF_NO_ASN_ID_STALL", + "TGRx_WRBUF_ASN_STALL", + "TGRx_UTLB0_TRANS", + "TGRx_UTLB0_TRANS_HIT", + "TGRx_UTLB0_TRANS_STALL", + "TGRx_UTLB0_MMU_REQ", + "TGRx_UTLB1_TRANS", + "TGRx_UTLB1_TRANS_HIT", + "TGRx_UTLB1_TRANS_STALL", + "TGRx_UTLB1_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TGRx_FRAG_ACTIVE", + "TGRx_FRAG_PRIMITIVES_OUT", + "TGRx_FRAG_PRIM_RAST", + "TGRx_FRAG_FPK_ACTIVE", + "TGRx_FRAG_STARVING", + "TGRx_FRAG_WARPS", + "TGRx_FRAG_PARTIAL_QUADS_RAST", + "TGRx_FRAG_QUADS_RAST", + "TGRx_FRAG_QUADS_EZS_TEST", + "TGRx_FRAG_QUADS_EZS_UPDATE", + "TGRx_FRAG_QUADS_EZS_KILL", + "TGRx_FRAG_LZS_TEST", + "TGRx_FRAG_LZS_KILL", + "TGRx_WARP_REG_SIZE_64", + "TGRx_FRAG_PTILES", + "TGRx_FRAG_TRANS_ELIM", + "TGRx_QUAD_FPK_KILLER", + "TGRx_FULL_QUAD_WARPS", + "TGRx_COMPUTE_ACTIVE", + "TGRx_COMPUTE_TASKS", + "TGRx_COMPUTE_WARPS", + "TGRx_COMPUTE_STARVING", + "TGRx_EXEC_CORE_ACTIVE", + "TGRx_EXEC_INSTR_FMA", + "TGRx_EXEC_INSTR_CVT", + "TGRx_EXEC_INSTR_SFU", + "TGRx_EXEC_INSTR_MSG", + "TGRx_EXEC_INSTR_DIVERGED", + "TGRx_EXEC_ICACHE_MISS", + "TGRx_EXEC_STARVE_ARITH", + "TGRx_CALL_BLEND_SHADER", + "TGRx_TEX_MSGI_NUM_FLITS", + "TGRx_TEX_DFCH_CLK_STALLED", + "TGRx_TEX_TFCH_CLK_STALLED", + "TGRx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", + "TGRx_TEX_FILT_NUM_OPERATIONS", + "TGRx_TEX_FILT_NUM_FXR_OPERATIONS", + "TGRx_TEX_FILT_NUM_FST_OPERATIONS", + "TGRx_TEX_MSGO_NUM_MSG", + "TGRx_TEX_MSGO_NUM_FLITS", + "TGRx_LS_MEM_READ_FULL", + "TGRx_LS_MEM_READ_SHORT", + "TGRx_LS_MEM_WRITE_FULL", + "TGRx_LS_MEM_WRITE_SHORT", + "TGRx_LS_MEM_ATOMIC", + "TGRx_VARY_INSTR", + "TGRx_VARY_SLOT_32", + "TGRx_VARY_SLOT_16", + "TGRx_ATTR_INSTR", + "TGRx_SHADER_CORE_ACTIVE", + "TGRx_BEATS_RD_FTC", + "TGRx_BEATS_RD_FTC_EXT", + "TGRx_BEATS_RD_LSC", + "TGRx_BEATS_RD_LSC_EXT", + "TGRx_BEATS_RD_TEX", + "TGRx_BEATS_RD_TEX_EXT", + "TGRx_BEATS_RD_OTHER", + "TGRx_BEATS_WR_LSC_OTHER", + "TGRx_BEATS_WR_TIB", + "TGRx_BEATS_WR_LSC_WB", + + /* Memory System */ + "", + "", + "", + "", + "TGRx_MMU_REQUESTS", + "TGRx_MMU_TABLE_READS_L3", + "TGRx_MMU_TABLE_READS_L2", + "TGRx_MMU_HIT_L3", + "TGRx_MMU_HIT_L2", + "", + "", + "", + "TGRx_L2_RD_MSG_IN_EVICT", + "TGRx_L2_RD_MSG_IN_CU", + "TGRx_L2_SNP_MSG_IN_SNPRSP", + "TGRx_L2_RD_MSG_OUT_SNPREQ", + "TGRx_L2_RD_MSG_IN", + "TGRx_L2_RD_MSG_IN_STALL", + "TGRx_L2_WR_MSG_IN", + "TGRx_L2_WR_MSG_IN_STALL", + "TGRx_L2_SNP_MSG_IN", + "TGRx_L2_SNP_MSG_IN_STALL", + "TGRx_L2_RD_MSG_OUT", + "TGRx_L2_RD_MSG_OUT_STALL", + "TGRx_L2_WR_MSG_OUT", + "TGRx_L2_ANY_LOOKUP", + "TGRx_L2_READ_LOOKUP", + "TGRx_L2_WRITE_LOOKUP", + "TGRx_L2_EXT_SNOOP_LOOKUP", + "TGRx_L2_EXT_READ", + "TGRx_L2_EXT_READ_NOSNP", + "TGRx_L2_EXT_READ_UNIQUE", + "TGRx_L2_EXT_READ_BEATS", + "TGRx_L2_EXT_AR_STALL", + "TGRx_L2_EXT_AR_CNT_Q1", + "TGRx_L2_EXT_AR_CNT_Q2", + "TGRx_L2_EXT_AR_CNT_Q3", + "TGRx_L2_EXT_RRESP_0_127", + "TGRx_L2_EXT_RRESP_128_191", + "TGRx_L2_EXT_RRESP_192_255", + "TGRx_L2_EXT_RRESP_256_319", + "TGRx_L2_EXT_RRESP_320_383", + "TGRx_L2_EXT_WRITE", + "TGRx_L2_EXT_WRITE_NOSNP_FULL", + "TGRx_L2_EXT_WRITE_NOSNP_PTL", + "TGRx_L2_EXT_WRITE_SNP_FULL", + "TGRx_L2_EXT_WRITE_SNP_PTL", + "TGRx_L2_EXT_WRITE_BEATS", + "TGRx_L2_EXT_W_STALL", + "TGRx_L2_EXT_AW_CNT_Q1", + "TGRx_L2_EXT_AW_CNT_Q2", + "TGRx_L2_EXT_AW_CNT_Q3", + "TGRx_L2_EXT_SNOOP", + "TGRx_L2_EXT_SNOOP_STALL", + "TGRx_L2_EXT_SNOOP_RESP_CLEAN", + "TGRx_L2_EXT_SNOOP_RESP_DATA", + "TGRx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G610 */ + static const char * const hardware_counters_mali_tVIx[] = { + /* CSF */ + "", + "", + "", + "", + "TVIx_GPU_ACTIVE", + "TVIx_MCU_ACTIVE", + "TVIx_GPU_ITER_ACTIVE", + "TVIx_MMU_FLUSH_COUNT", + "TVIx_MESSAGES_SENT", + "TVIx_MESSAGES_RECEIVED", + "TVIx_GPU_IRQ_ACTIVE", + "TVIx_GPU_IRQ_COUNT", + "TVIx_CACHE_FLUSH_CYCLES", + "TVIx_CACHE_FLUSH", + "TVIx_DOORBELL_IRQ_ACTIVE", + "TVIx_DOORBELL_IRQ_COUNT", + "TVIx_ITER_TILER_ACTIVE", + "TVIx_ITER_TILER_JOB_COMPLETED", + "TVIx_ITER_TILER_IDVS_TASK_COMPLETED", + "TVIx_ITER_TILER_TOTAL_IDVS_TASK_ACTIVE_CYCLES", + "TVIx_ITER_TILER_IRQ_ACTIVE", + "TVIx_ITER_TILER_IRQ_COUNT", + "TVIx_ITER_TILER_READY_BLOCKED", + "TVIx_ITER_TILER_EP_DRAIN", + "TVIx_ITER_COMP_ACTIVE", + "TVIx_ITER_COMP_JOB_COMPLETED", + "TVIx_ITER_COMP_TASK_COMPLETED", + "TVIx_ITER_COMP_TOTAL_TASK_ACTIVE_CYCLES", + "TVIx_ITER_COMP_IRQ_ACTIVE", + "TVIx_ITER_COMP_IRQ_COUNT", + "TVIx_ITER_COMP_READY_BLOCKED", + "TVIx_ITER_COMP_EP_DRAIN", + "TVIx_ITER_FRAG_ACTIVE", + "TVIx_ITER_FRAG_JOB_COMPLETED", + "TVIx_ITER_FRAG_TASK_COMPLETED", + "TVIx_ITER_FRAG_TOTAL_TASK_ACTIVE_CYCLES", + "TVIx_ITER_FRAG_IRQ_ACTIVE", + "TVIx_ITER_FRAG_IRQ_COUNT", + "TVIx_ITER_FRAG_READY_BLOCKED", + "TVIx_ITER_FRAG_TILE_MAP_READ_WAIT", + "TVIx_CEU_ACTIVE", + "TVIx_CEU_READY_BLOCKED", + "TVIx_CEU_COMMAND_COUNT", + "TVIx_CEU_STATE_TRANSFER_STALLED", + "TVIx_CEU_LSU_REQUEST_STALLED", + "TVIx_LSU_ACTIVE", + "TVIx_LSU_OPERATIONS_COMPLETED", + "TVIx_LSU_TOTAL_HANDLERS_ACTIVE_CYCLES", + "TVIx_CSHWIF0_ENABLED", + "TVIx_CSHWIF0_PREFETCH_MISS_COUNT", + "TVIx_CSHWIF0_IRQ_ACTIVE", + "TVIx_CSHWIF0_WAIT_BLOCKED", + "TVIx_CSHWIF1_ENABLED", + "TVIx_CSHWIF1_PREFETCH_MISS_COUNT", + "TVIx_CSHWIF1_IRQ_ACTIVE", + "TVIx_CSHWIF1_WAIT_BLOCKED", + "TVIx_CSHWIF2_ENABLED", + "TVIx_CSHWIF2_PREFETCH_MISS_COUNT", + "TVIx_CSHWIF2_IRQ_ACTIVE", + "TVIx_CSHWIF2_WAIT_BLOCKED", + "TVIx_CSHWIF3_ENABLED", + "TVIx_CSHWIF3_PREFETCH_MISS_COUNT", + "TVIx_CSHWIF3_IRQ_ACTIVE", + "TVIx_CSHWIF3_WAIT_BLOCKED", + + /* Tiler */ + "", + "", + "", + "", + "TVIx_TILER_ACTIVE", + "TVIx_JOBS_PROCESSED", + "TVIx_TRIANGLES", + "TVIx_LINES", + "TVIx_POINTS", + "TVIx_FRONT_FACING", + "TVIx_BACK_FACING", + "TVIx_PRIM_VISIBLE", + "TVIx_PRIM_CULLED", + "TVIx_PRIM_CLIPPED", + "TVIx_PRIM_SAT_CULLED", + "TVIx_BIN_ALLOC_INIT", + "TVIx_BIN_ALLOC_OVERFLOW", + "TVIx_BUS_READ", + "TVIx_BUS_WRITE_UTLB0", + "TVIx_BUS_WRITE_UTLB1", + "TVIx_SUSPENDED", + "TVIx_IDVS_POS_SHAD_REQ", + "TVIx_IDVS_POS_SHAD_WAIT", + "TVIx_IDVS_POS_SHAD_STALL", + "TVIx_IDVS_POS_FIFO_FULL", + "TVIx_PREFETCH_STALL", + "TVIx_VCACHE_HIT", + "TVIx_VCACHE_MISS", + "TVIx_VCACHE_LINE_WAIT", + "TVIx_VFETCH_POS_READ_WAIT", + "TVIx_VFETCH_VERTEX_WAIT", + "TVIx_VFETCH_STALL", + "TVIx_PRIMASSY_STALL", + "TVIx_RESUMED", + "TVIx_IDVS_VBU_HIT", + "TVIx_IDVS_VBU_MISS", + "TVIx_IDVS_VBU_LINE_DEALLOCATE", + "TVIx_IDVS_VAR_SHAD_REQ", + "TVIx_IDVS_VAR_SHAD_STALL", + "TVIx_BINNER_STALL", + "TVIx_ITER_STALL", + "TVIx_COMPRESS_MISS", + "TVIx_COMPRESS_STALL", + "TVIx_PCACHE_HIT", + "TVIx_PCACHE_MISS", + "TVIx_PCACHE_MISS_STALL", + "TVIx_PCACHE_EVICT_STALL", + "TVIx_PMGR_PTR_WR_STALL", + "TVIx_PMGR_PTR_RD_STALL", + "TVIx_PMGR_CMD_WR_STALL", + "TVIx_WRBUF_ACTIVE", + "TVIx_WRBUF_HIT", + "TVIx_WRBUF_MISS", + "TVIx_WRBUF_NO_FREE_LINE_STALL", + "TVIx_WRBUF_NO_ASN_ID_STALL", + "TVIx_WRBUF_ASN_STALL", + "TVIx_UTLB0_TRANS", + "TVIx_UTLB0_TRANS_HIT", + "TVIx_UTLB0_TRANS_STALL", + "TVIx_UTLB0_MMU_REQ", + "TVIx_UTLB1_TRANS", + "TVIx_UTLB1_TRANS_HIT", + "TVIx_UTLB1_TRANS_STALL", + "TVIx_UTLB1_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TVIx_FRAG_ACTIVE", + "TVIx_FRAG_PRIMITIVES_OUT", + "TVIx_FRAG_PRIM_RAST", + "TVIx_FRAG_FPK_ACTIVE", + "TVIx_FRAG_STARVING", + "TVIx_FRAG_WARPS", + "TVIx_FRAG_PARTIAL_QUADS_RAST", + "TVIx_FRAG_QUADS_RAST", + "TVIx_FRAG_QUADS_EZS_TEST", + "TVIx_FRAG_QUADS_EZS_UPDATE", + "TVIx_FRAG_QUADS_EZS_KILL", + "TVIx_FRAG_LZS_TEST", + "TVIx_FRAG_LZS_KILL", + "TVIx_WARP_REG_SIZE_64", + "TVIx_FRAG_PTILES", + "TVIx_FRAG_TRANS_ELIM", + "TVIx_QUAD_FPK_KILLER", + "TVIx_FULL_QUAD_WARPS", + "TVIx_COMPUTE_ACTIVE", + "TVIx_COMPUTE_TASKS", + "TVIx_COMPUTE_WARPS", + "TVIx_COMPUTE_STARVING", + "TVIx_EXEC_CORE_ACTIVE", + "TVIx_EXEC_INSTR_FMA", + "TVIx_EXEC_INSTR_CVT", + "TVIx_EXEC_INSTR_SFU", + "TVIx_EXEC_INSTR_MSG", + "TVIx_EXEC_INSTR_DIVERGED", + "TVIx_EXEC_ICACHE_MISS", + "TVIx_EXEC_STARVE_ARITH", + "TVIx_CALL_BLEND_SHADER", + "TVIx_TEX_MSGI_NUM_FLITS", + "TVIx_TEX_DFCH_CLK_STALLED", + "TVIx_TEX_TFCH_CLK_STALLED", + "TVIx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", + "TVIx_TEX_FILT_NUM_OPERATIONS", + "TVIx_TEX_FILT_NUM_FXR_OPERATIONS", + "TVIx_TEX_FILT_NUM_FST_OPERATIONS", + "TVIx_TEX_MSGO_NUM_MSG", + "TVIx_TEX_MSGO_NUM_FLITS", + "TVIx_LS_MEM_READ_FULL", + "TVIx_LS_MEM_READ_SHORT", + "TVIx_LS_MEM_WRITE_FULL", + "TVIx_LS_MEM_WRITE_SHORT", + "TVIx_LS_MEM_ATOMIC", + "TVIx_VARY_INSTR", + "TVIx_VARY_SLOT_32", + "TVIx_VARY_SLOT_16", + "TVIx_ATTR_INSTR", + "TVIx_SHADER_CORE_ACTIVE", + "TVIx_BEATS_RD_FTC", + "TVIx_BEATS_RD_FTC_EXT", + "TVIx_BEATS_RD_LSC", + "TVIx_BEATS_RD_LSC_EXT", + "TVIx_BEATS_RD_TEX", + "TVIx_BEATS_RD_TEX_EXT", + "TVIx_BEATS_RD_OTHER", + "TVIx_BEATS_WR_LSC_OTHER", + "TVIx_BEATS_WR_TIB", + "TVIx_BEATS_WR_LSC_WB", + + /* Memory System */ + "", + "", + "", + "", + "TVIx_MMU_REQUESTS", + "TVIx_MMU_TABLE_READS_L3", + "TVIx_MMU_TABLE_READS_L2", + "TVIx_MMU_HIT_L3", + "TVIx_MMU_HIT_L2", + "", + "", + "", + "TVIx_L2_RD_MSG_IN_EVICT", + "TVIx_L2_RD_MSG_IN_CU", + "TVIx_L2_SNP_MSG_IN_SNPRSP", + "TVIx_L2_RD_MSG_OUT_SNPREQ", + "TVIx_L2_RD_MSG_IN", + "TVIx_L2_RD_MSG_IN_STALL", + "TVIx_L2_WR_MSG_IN", + "TVIx_L2_WR_MSG_IN_STALL", + "TVIx_L2_SNP_MSG_IN", + "TVIx_L2_SNP_MSG_IN_STALL", + "TVIx_L2_RD_MSG_OUT", + "TVIx_L2_RD_MSG_OUT_STALL", + "TVIx_L2_WR_MSG_OUT", + "TVIx_L2_ANY_LOOKUP", + "TVIx_L2_READ_LOOKUP", + "TVIx_L2_WRITE_LOOKUP", + "TVIx_L2_EXT_SNOOP_LOOKUP", + "TVIx_L2_EXT_READ", + "TVIx_L2_EXT_READ_NOSNP", + "TVIx_L2_EXT_READ_UNIQUE", + "TVIx_L2_EXT_READ_BEATS", + "TVIx_L2_EXT_AR_STALL", + "TVIx_L2_EXT_AR_CNT_Q1", + "TVIx_L2_EXT_AR_CNT_Q2", + "TVIx_L2_EXT_AR_CNT_Q3", + "TVIx_L2_EXT_RRESP_0_127", + "TVIx_L2_EXT_RRESP_128_191", + "TVIx_L2_EXT_RRESP_192_255", + "TVIx_L2_EXT_RRESP_256_319", + "TVIx_L2_EXT_RRESP_320_383", + "TVIx_L2_EXT_WRITE", + "TVIx_L2_EXT_WRITE_NOSNP_FULL", + "TVIx_L2_EXT_WRITE_NOSNP_PTL", + "TVIx_L2_EXT_WRITE_SNP_FULL", + "TVIx_L2_EXT_WRITE_SNP_PTL", + "TVIx_L2_EXT_WRITE_BEATS", + "TVIx_L2_EXT_W_STALL", + "TVIx_L2_EXT_AW_CNT_Q1", + "TVIx_L2_EXT_AW_CNT_Q2", + "TVIx_L2_EXT_AW_CNT_Q3", + "TVIx_L2_EXT_SNOOP", + "TVIx_L2_EXT_SNOOP_STALL", + "TVIx_L2_EXT_SNOOP_RESP_CLEAN", + "TVIx_L2_EXT_SNOOP_RESP_DATA", + "TVIx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + + /* Mali-G710 */ + static const char * const hardware_counters_mali_tODx[] = { + /* CSF */ + "", + "", + "", + "", + "TODx_GPU_ACTIVE", + "TODx_MCU_ACTIVE", + "TODx_GPU_ITER_ACTIVE", + "TODx_MMU_FLUSH_COUNT", + "TODx_MESSAGES_SENT", + "TODx_MESSAGES_RECEIVED", + "TODx_GPU_IRQ_ACTIVE", + "TODx_GPU_IRQ_COUNT", + "TODx_CACHE_FLUSH_CYCLES", + "TODx_CACHE_FLUSH", + "TODx_DOORBELL_IRQ_ACTIVE", + "TODx_DOORBELL_IRQ_COUNT", + "TODx_ITER_TILER_ACTIVE", + "TODx_ITER_TILER_JOB_COMPLETED", + "TODx_ITER_TILER_IDVS_TASK_COMPLETED", + "TODx_ITER_TILER_TOTAL_IDVS_TASK_ACTIVE_CYCLES", + "TODx_ITER_TILER_IRQ_ACTIVE", + "TODx_ITER_TILER_IRQ_COUNT", + "TODx_ITER_TILER_READY_BLOCKED", + "TODx_ITER_TILER_EP_DRAIN", + "TODx_ITER_COMP_ACTIVE", + "TODx_ITER_COMP_JOB_COMPLETED", + "TODx_ITER_COMP_TASK_COMPLETED", + "TODx_ITER_COMP_TOTAL_TASK_ACTIVE_CYCLES", + "TODx_ITER_COMP_IRQ_ACTIVE", + "TODx_ITER_COMP_IRQ_COUNT", + "TODx_ITER_COMP_READY_BLOCKED", + "TODx_ITER_COMP_EP_DRAIN", + "TODx_ITER_FRAG_ACTIVE", + "TODx_ITER_FRAG_JOB_COMPLETED", + "TODx_ITER_FRAG_TASK_COMPLETED", + "TODx_ITER_FRAG_TOTAL_TASK_ACTIVE_CYCLES", + "TODx_ITER_FRAG_IRQ_ACTIVE", + "TODx_ITER_FRAG_IRQ_COUNT", + "TODx_ITER_FRAG_READY_BLOCKED", + "TODx_ITER_FRAG_TILE_MAP_READ_WAIT", + "TODx_CEU_ACTIVE", + "TODx_CEU_READY_BLOCKED", + "TODx_CEU_COMMAND_COUNT", + "TODx_CEU_STATE_TRANSFER_STALLED", + "TODx_CEU_LSU_REQUEST_STALLED", + "TODx_LSU_ACTIVE", + "TODx_LSU_OPERATIONS_COMPLETED", + "TODx_LSU_TOTAL_HANDLERS_ACTIVE_CYCLES", + "TODx_CSHWIF0_ENABLED", + "TODx_CSHWIF0_PREFETCH_MISS_COUNT", + "TODx_CSHWIF0_IRQ_ACTIVE", + "TODx_CSHWIF0_WAIT_BLOCKED", + "TODx_CSHWIF1_ENABLED", + "TODx_CSHWIF1_PREFETCH_MISS_COUNT", + "TODx_CSHWIF1_IRQ_ACTIVE", + "TODx_CSHWIF1_WAIT_BLOCKED", + "TODx_CSHWIF2_ENABLED", + "TODx_CSHWIF2_PREFETCH_MISS_COUNT", + "TODx_CSHWIF2_IRQ_ACTIVE", + "TODx_CSHWIF2_WAIT_BLOCKED", + "TODx_CSHWIF3_ENABLED", + "TODx_CSHWIF3_PREFETCH_MISS_COUNT", + "TODx_CSHWIF3_IRQ_ACTIVE", + "TODx_CSHWIF3_WAIT_BLOCKED", + + /* Tiler */ + "", + "", + "", + "", + "TODx_TILER_ACTIVE", + "TODx_JOBS_PROCESSED", + "TODx_TRIANGLES", + "TODx_LINES", + "TODx_POINTS", + "TODx_FRONT_FACING", + "TODx_BACK_FACING", + "TODx_PRIM_VISIBLE", + "TODx_PRIM_CULLED", + "TODx_PRIM_CLIPPED", + "TODx_PRIM_SAT_CULLED", + "TODx_BIN_ALLOC_INIT", + "TODx_BIN_ALLOC_OVERFLOW", + "TODx_BUS_READ", + "TODx_BUS_WRITE_UTLB0", + "TODx_BUS_WRITE_UTLB1", + "TODx_SUSPENDED", + "TODx_IDVS_POS_SHAD_REQ", + "TODx_IDVS_POS_SHAD_WAIT", + "TODx_IDVS_POS_SHAD_STALL", + "TODx_IDVS_POS_FIFO_FULL", + "TODx_PREFETCH_STALL", + "TODx_VCACHE_HIT", + "TODx_VCACHE_MISS", + "TODx_VCACHE_LINE_WAIT", + "TODx_VFETCH_POS_READ_WAIT", + "TODx_VFETCH_VERTEX_WAIT", + "TODx_VFETCH_STALL", + "TODx_PRIMASSY_STALL", + "TODx_RESUMED", + "TODx_IDVS_VBU_HIT", + "TODx_IDVS_VBU_MISS", + "TODx_IDVS_VBU_LINE_DEALLOCATE", + "TODx_IDVS_VAR_SHAD_REQ", + "TODx_IDVS_VAR_SHAD_STALL", + "TODx_BINNER_STALL", + "TODx_ITER_STALL", + "TODx_COMPRESS_MISS", + "TODx_COMPRESS_STALL", + "TODx_PCACHE_HIT", + "TODx_PCACHE_MISS", + "TODx_PCACHE_MISS_STALL", + "TODx_PCACHE_EVICT_STALL", + "TODx_PMGR_PTR_WR_STALL", + "TODx_PMGR_PTR_RD_STALL", + "TODx_PMGR_CMD_WR_STALL", + "TODx_WRBUF_ACTIVE", + "TODx_WRBUF_HIT", + "TODx_WRBUF_MISS", + "TODx_WRBUF_NO_FREE_LINE_STALL", + "TODx_WRBUF_NO_ASN_ID_STALL", + "TODx_WRBUF_ASN_STALL", + "TODx_UTLB0_TRANS", + "TODx_UTLB0_TRANS_HIT", + "TODx_UTLB0_TRANS_STALL", + "TODx_UTLB0_MMU_REQ", + "TODx_UTLB1_TRANS", + "TODx_UTLB1_TRANS_HIT", + "TODx_UTLB1_TRANS_STALL", + "TODx_UTLB1_MMU_REQ", + + /* Shader Core */ + "", + "", + "", + "", + "TODx_FRAG_ACTIVE", + "TODx_FRAG_PRIMITIVES_OUT", + "TODx_FRAG_PRIM_RAST", + "TODx_FRAG_FPK_ACTIVE", + "TODx_FRAG_STARVING", + "TODx_FRAG_WARPS", + "TODx_FRAG_PARTIAL_QUADS_RAST", + "TODx_FRAG_QUADS_RAST", + "TODx_FRAG_QUADS_EZS_TEST", + "TODx_FRAG_QUADS_EZS_UPDATE", + "TODx_FRAG_QUADS_EZS_KILL", + "TODx_FRAG_LZS_TEST", + "TODx_FRAG_LZS_KILL", + "TODx_WARP_REG_SIZE_64", + "TODx_FRAG_PTILES", + "TODx_FRAG_TRANS_ELIM", + "TODx_QUAD_FPK_KILLER", + "TODx_FULL_QUAD_WARPS", + "TODx_COMPUTE_ACTIVE", + "TODx_COMPUTE_TASKS", + "TODx_COMPUTE_WARPS", + "TODx_COMPUTE_STARVING", + "TODx_EXEC_CORE_ACTIVE", + "TODx_EXEC_INSTR_FMA", + "TODx_EXEC_INSTR_CVT", + "TODx_EXEC_INSTR_SFU", + "TODx_EXEC_INSTR_MSG", + "TODx_EXEC_INSTR_DIVERGED", + "TODx_EXEC_ICACHE_MISS", + "TODx_EXEC_STARVE_ARITH", + "TODx_CALL_BLEND_SHADER", + "TODx_TEX_MSGI_NUM_FLITS", + "TODx_TEX_DFCH_CLK_STALLED", + "TODx_TEX_TFCH_CLK_STALLED", + "TODx_TEX_TFCH_STARVED_PENDING_DATA_FETCH", + "TODx_TEX_FILT_NUM_OPERATIONS", + "TODx_TEX_FILT_NUM_FXR_OPERATIONS", + "TODx_TEX_FILT_NUM_FST_OPERATIONS", + "TODx_TEX_MSGO_NUM_MSG", + "TODx_TEX_MSGO_NUM_FLITS", + "TODx_LS_MEM_READ_FULL", + "TODx_LS_MEM_READ_SHORT", + "TODx_LS_MEM_WRITE_FULL", + "TODx_LS_MEM_WRITE_SHORT", + "TODx_LS_MEM_ATOMIC", + "TODx_VARY_INSTR", + "TODx_VARY_SLOT_32", + "TODx_VARY_SLOT_16", + "TODx_ATTR_INSTR", + "TODx_SHADER_CORE_ACTIVE", + "TODx_BEATS_RD_FTC", + "TODx_BEATS_RD_FTC_EXT", + "TODx_BEATS_RD_LSC", + "TODx_BEATS_RD_LSC_EXT", + "TODx_BEATS_RD_TEX", + "TODx_BEATS_RD_TEX_EXT", + "TODx_BEATS_RD_OTHER", + "TODx_BEATS_WR_LSC_OTHER", + "TODx_BEATS_WR_TIB", + "TODx_BEATS_WR_LSC_WB", + + /* Memory System */ + "", + "", + "", + "", + "TODx_MMU_REQUESTS", + "TODx_MMU_TABLE_READS_L3", + "TODx_MMU_TABLE_READS_L2", + "TODx_MMU_HIT_L3", + "TODx_MMU_HIT_L2", + "", + "", + "", + "TODx_L2_RD_MSG_IN_EVICT", + "TODx_L2_RD_MSG_IN_CU", + "TODx_L2_SNP_MSG_IN_SNPRSP", + "TODx_L2_RD_MSG_OUT_SNPREQ", + "TODx_L2_RD_MSG_IN", + "TODx_L2_RD_MSG_IN_STALL", + "TODx_L2_WR_MSG_IN", + "TODx_L2_WR_MSG_IN_STALL", + "TODx_L2_SNP_MSG_IN", + "TODx_L2_SNP_MSG_IN_STALL", + "TODx_L2_RD_MSG_OUT", + "TODx_L2_RD_MSG_OUT_STALL", + "TODx_L2_WR_MSG_OUT", + "TODx_L2_ANY_LOOKUP", + "TODx_L2_READ_LOOKUP", + "TODx_L2_WRITE_LOOKUP", + "TODx_L2_EXT_SNOOP_LOOKUP", + "TODx_L2_EXT_READ", + "TODx_L2_EXT_READ_NOSNP", + "TODx_L2_EXT_READ_UNIQUE", + "TODx_L2_EXT_READ_BEATS", + "TODx_L2_EXT_AR_STALL", + "TODx_L2_EXT_AR_CNT_Q1", + "TODx_L2_EXT_AR_CNT_Q2", + "TODx_L2_EXT_AR_CNT_Q3", + "TODx_L2_EXT_RRESP_0_127", + "TODx_L2_EXT_RRESP_128_191", + "TODx_L2_EXT_RRESP_192_255", + "TODx_L2_EXT_RRESP_256_319", + "TODx_L2_EXT_RRESP_320_383", + "TODx_L2_EXT_WRITE", + "TODx_L2_EXT_WRITE_NOSNP_FULL", + "TODx_L2_EXT_WRITE_NOSNP_PTL", + "TODx_L2_EXT_WRITE_SNP_FULL", + "TODx_L2_EXT_WRITE_SNP_PTL", + "TODx_L2_EXT_WRITE_BEATS", + "TODx_L2_EXT_W_STALL", + "TODx_L2_EXT_AW_CNT_Q1", + "TODx_L2_EXT_AW_CNT_Q2", + "TODx_L2_EXT_AW_CNT_Q3", + "TODx_L2_EXT_SNOOP", + "TODx_L2_EXT_SNOOP_STALL", + "TODx_L2_EXT_SNOOP_RESP_CLEAN", + "TODx_L2_EXT_SNOOP_RESP_DATA", + "TODx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", + }; + +} + +#endif /* NATIVE_GATOR_DAEMON_MALI_USERSPACE_MALIHWCNTRNAMES_BIFROST_H_ */ diff --git a/daemon/non_root/NonRootSource.cpp b/daemon/non_root/NonRootSource.cpp index 74c2ae83..aea38138 100644 --- a/daemon/non_root/NonRootSource.cpp +++ b/daemon/non_root/NonRootSource.cpp @@ -26,17 +26,21 @@ #include namespace non_root { + static constexpr std::size_t default_buffer_size = 1UL * 1024UL * 1024UL; + NonRootSource::NonRootSource(NonRootDriver & driver_, sem_t & senderSem_, + std::function execTargetAppCallback_, std::function profilingStartedCallback_, const ICpuInfo & cpuInfo) - : mSwitchBuffers(1 * 1024 * 1024, senderSem_), - mGlobalCounterBuffer(1 * 1024 * 1024, senderSem_), - mProcessCounterBuffer(1 * 1024 * 1024, senderSem_), - mMiscBuffer(1 * 1024 * 1024, senderSem_), + : mSwitchBuffers(default_buffer_size, senderSem_), + mGlobalCounterBuffer(default_buffer_size, senderSem_), + mProcessCounterBuffer(default_buffer_size, senderSem_), + mMiscBuffer(default_buffer_size, senderSem_), interrupted(false), timestampSource(CLOCK_MONOTONIC_RAW), driver(driver_), + execTargetAppCallback(std::move(execTargetAppCallback_)), profilingStartedCallback(std::move(profilingStartedCallback_)), cpuInfo(cpuInfo) @@ -71,6 +75,7 @@ namespace non_root { ProcessPoller processPoller(processStateTracker, timestampSource); profilingStartedCallback(); + execTargetAppCallback(); const useconds_t sleepIntervalUs = (gSessionData.mSampleRate < 1000 ? 10000 : 1000); // select 1ms or 10ms depending on normal or low rate @@ -142,8 +147,7 @@ namespace non_root { LOG_DEBUG("clock_gettime failed"); return {}; } - const int64_t timestamp = ts.tv_sec * NS_PER_S + ts.tv_nsec; - + const uint64_t timestamp = ts.tv_sec * NS_PER_S + ts.tv_nsec; const uint64_t monotonicStarted = timestampSource.getBaseTimestampNS(); const uint64_t currTime = 0; diff --git a/daemon/non_root/NonRootSource.h b/daemon/non_root/NonRootSource.h index 796c7619..8f6414b2 100644 --- a/daemon/non_root/NonRootSource.h +++ b/daemon/non_root/NonRootSource.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2017-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2017-2022 by Arm Limited. All rights reserved. */ #ifndef INCLUDE_NON_ROOT_NONROOTSOURCE_H #define INCLUDE_NON_ROOT_NONROOTSOURCE_H @@ -25,6 +25,7 @@ namespace non_root { public: NonRootSource(NonRootDriver & driver, sem_t & senderSem, + std::function execTargetAppCallback, std::function profilingStartedCallback, const ICpuInfo & cpuInfo); @@ -41,6 +42,7 @@ namespace non_root { std::atomic interrupted; lib::TimestampSource timestampSource; NonRootDriver & driver; + std::function execTargetAppCallback; std::function profilingStartedCallback; const ICpuInfo & cpuInfo; diff --git a/daemon/xml/PmuXMLParser.cpp b/daemon/xml/PmuXMLParser.cpp index aa0485f7..9ebb63a5 100644 --- a/daemon/xml/PmuXMLParser.cpp +++ b/daemon/xml/PmuXMLParser.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2021 by Arm Limited. All rights reserved. */ +/* Copyright (C) 2010-2022 by Arm Limited. All rights reserved. */ #include "xml/PmuXMLParser.h" @@ -154,7 +154,7 @@ bool parseXml(const char * const xml, PmuXML & pmuXml) const char * const counterSet = (counterSetAttr != nullptr ? counterSetAttr : id); // uses id as default const char * const coreName = mxmlElementGetAttr(node, ATTR_CORE_NAME); const char * const dtName = mxmlElementGetAttr(node, ATTR_DT_NAME); - const char * const speName = mxmlElementGetAttr(node, ATTR_SPE_NAME); + const char * speName = mxmlElementGetAttr(node, ATTR_SPE_NAME); const char * const pmncCountersStr = mxmlElementGetAttr(node, ATTR_PMNC_COUNTERS); const char * const profileStr = mxmlElementGetAttr(node, ATTR_PROFILE); @@ -209,6 +209,26 @@ bool parseXml(const char * const xml, PmuXML & pmuXml) ATTR_PMNC_COUNTERS, pmncCounters); + // Check if SPE name is specified for the given CPU. If so, check to see if the SPE device is configured on the device. + if (speName != nullptr) + { + bool speDeviceFound = false; + lib::FsEntryDirectoryIterator it = lib::FsEntry::create(PERF_DEVICES).children(); + std::optional child; + while (!!(child = it.next())) { + if (child->name().find("spe") != std::string::npos) { + // SPE device found in /sys/bus/event_source/devices + speDeviceFound = true; + break; + } + } + + if (!speDeviceFound) + { + speName = nullptr; + } + } + pmuXml.cpus.emplace_back(coreName, id, counterSet, dtName, speName, std::move(cpuIds), pmncCounters, isV8); } diff --git a/gator_me.py b/gator_me.py index dfad1fac..e6d3dffb 100755 --- a/gator_me.py +++ b/gator_me.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (C) 2019-2021 by Arm Limited +# Copyright (C) 2019-2022 by Arm Limited # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -573,16 +573,9 @@ def clean_gatord(device, package): device.adb_quiet("shell", "am", "force-stop", package) # Remove any data files in both bounce directory and app directory - adir = "/data/data/%s/" % package device.adb_quiet("shell", "rm", "-f", "%sgatord" % bounce_dir) device.adb_quiet("shell", "rm", "-f", "%sconfiguration.xml" % bounce_dir) device.adb_quiet("shell", "rm", "-rf", "%s%s.apc" % (bounce_dir, package)) - target = "%sgatord" % adir - device.adb_quiet("shell", "run-as", package, "rm", "-f", target) - target = "%sconfiguration.xml" % adir - device.adb_quiet("shell", "run-as", package, "rm", "-f", target) - target = "%s%s.apc" % (adir, package) - device.adb_quiet("shell", "run-as", package, "rm", "-rf", target) # Disable perf counters device.adb_quiet("shell", "setprop", "security.perf_harden", "1") @@ -599,17 +592,13 @@ def install_gatord(device, package, gatord, configuration): may be None for non-headless runs. """ # Install gatord - adir = "/data/data/%s/" % package device.adb("push", gatord, "%sgatord" % bounce_dir) device.adb("shell", "chmod", "0777", "%sgatord" % bounce_dir) - device.adb("shell", "run-as", package, "cp", "%sgatord" % bounce_dir, adir) # Install gatord counter configuration if configuration: device.adb("push", configuration, "%sconfiguration.xml" % bounce_dir) device.adb("shell", "chmod", "0666", "%sconfiguration.xml" % bounce_dir) - device.adb("shell", "run-as", package, "cp", - "%sconfiguration.xml" % bounce_dir, adir) # Enable perf conters device.adb("shell", "setprop", "security.perf_harden", "0") @@ -626,8 +615,7 @@ def run_gatord_interactive(device, package): # Run gatord commands = [ - "shell", "run-as", package, "/data/data/%s/gatord" % package, - "--wait-process", package, "-p", "uds"] + "shell", "%sgatord" % bounce_dir, "--android-pkg", package, "-p", "uds"] if DEBUG_GATORD: commands.append("-d") @@ -681,44 +669,38 @@ def run_gatord_headless(device, package, outputName, timeout): # Run gatord apcName = "%s.apc" % package + remoteApcPath = "%s%s" % (bounce_dir, apcName,) device.adb( - "shell", "run-as", package, "/data/data/%s/gatord" % package, - "--wait-process", package, "--stop-on-exit", "yes", - "--max-duration", "%u" % timeout, "--output", apcName) + "shell", "%sgatord" % bounce_dir, + "--android-pkg", package, "--stop-on-exit", "yes", + "--max-duration", "%u" % timeout, "--output", remoteApcPath) print(" Capture complete, downloading from target") - with tempfile.NamedTemporaryFile() as fileHandle: - # Fetch the results by streaming a tar file; we can't "adb pull" - # directly for new Android applications due to SELinux policy - tempName = fileHandle.name - fileHandle.close() - device.adb( - "exec-out", "run-as", package, "tar", "-c", apcName, ">", tempName, - text=False, shell=True) - - # Repack the tar file into the required output format - with tempfile.TemporaryDirectory() as tempDir: - with tarfile.TarFile(tempName) as tarHandle: - # Extract the tar file - tarHandle.extractall(tempDir) - - # Rename the APC to the required name - outApcName = os.path.basename(outputName) - if outApcName.endswith(".zip"): - outApcName = outApcName[:-4] - - oldName = os.path.join(tempDir, apcName) - newName = os.path.join(tempDir, outApcName) - os.rename(oldName, newName) - - # Pack as appropriate - if outputName.endswith(".apc"): - shutil.move(newName, outputName) - else: - # Remove .zip from the path (the shutil function adds it) - outZipName = outputName[:-4] - shutil.make_archive(outZipName, "zip", tempDir) + with tempfile.TemporaryDirectory() as tempDir: + # Fetch the results + device.adb("pull", remoteApcPath, tempDir) + + # Repack the capture directory into the required output format + + # Rename the APC to the required name + outApcName = os.path.basename(outputName) + if outApcName.endswith(".zip"): + outApcName = outApcName[:-4] + + oldName = os.path.join(tempDir, apcName) + newName = os.path.join(tempDir, outApcName) + + if (oldName != newName): + os.rename(oldName, newName) + + # Pack as appropriate + if outputName.endswith(".apc"): + shutil.move(newName, outputName) + else: + # Remove .zip from the path (the shutil function adds it) + outZipName = outputName[:-4] + shutil.make_archive(outZipName, "zip", tempDir) def exit_handler(device, package): diff --git a/release-notes.md b/release-notes.md index 55fbb7db..13f9e94c 100644 --- a/release-notes.md +++ b/release-notes.md @@ -1,3 +1,7 @@ +# Gator 8.1 + +This release completes the preparatory work needed to support accessing data from Perfetto (traced) on the target. + # Gator 8.0 This release introduces support for Android Thermal State polling and associated visualization in Streamline. As a side affect, separate Android and Linux binaries are shipped with Streamline.