|
30 | 30 | #include "ur_api.h" |
31 | 31 | #include "ze_api.h" |
32 | 32 |
|
33 | | -/* Batched queues enable submission of operations to the driver in batches, |
34 | | - * therefore reducing the overhead of submitting every single operation |
35 | | - * individually. Similarly to command buffers in L0v2, they use regular command |
36 | | - * lists (later referenced as 'batches'). Operations enqueued on regular command |
37 | | - * lists are not executed immediately, but only after enqueueing the regular |
38 | | - * command list on an immediate command list. However, in contrast to command |
39 | | - * buffers, batched queues also handle submission of batches (regular command |
40 | | - * lists) instead of only collecting enqueued operations, by using an internal |
41 | | - * immediate command list. Command lists are managed by a batch_manager inside a |
42 | | - * batched queue. |
43 | | - * |
44 | | - * Batched queues can be enabled by setting UR_QUEUE_FLAG_SUBMISSION_BATCHED in |
45 | | - * ur_queue_flags_t or globally, through the environment variable |
46 | | - * UR_L0_FORCE_BATCHED=1. |
47 | | - */ |
| 33 | +// Batched queues enable submission of operations to the driver in batches, |
| 34 | +// therefore reducing the overhead of submitting every single operation |
| 35 | +// individually. Similarly to command buffers in L0v2, they use regular command |
| 36 | +// lists (later referenced as 'batches'). Operations enqueued on regular command |
| 37 | +// lists are not executed immediately, but only after enqueueing the regular |
| 38 | +// command list on an immediate command list. However, in contrast to command |
| 39 | +// buffers, batched queues also handle submission of batches (regular command |
| 40 | +// lists) instead of only collecting enqueued operations, by using an internal |
| 41 | +// immediate command list. Command lists are managed by a batch_manager inside a |
| 42 | +// batched queue. |
| 43 | +// |
| 44 | +// Batched queues can be enabled by setting UR_QUEUE_FLAG_SUBMISSION_BATCHED in |
| 45 | +// ur_queue_flags_t or globally, through the environment variable |
| 46 | +// UR_L0_FORCE_BATCHED=1. |
48 | 47 |
|
49 | 48 | namespace v2 { |
50 | 49 |
|
51 | 50 | struct batch_manager { |
52 | 51 | private: |
53 | | - /* The currently active regular command list, which may be replaced in the |
54 | | - * command list manager, submitted for execution on the immediate command list |
55 | | - * and stored in the vector of submitted batches while awaiting execution |
56 | | - * completion |
57 | | - */ |
| 52 | + // The currently active regular command list, which may be replaced in the |
| 53 | + // command list manager, submitted for execution on the immediate command list |
| 54 | + // and stored in the vector of submitted batches while awaiting execution |
| 55 | + // completion |
58 | 56 | ur_command_list_manager activeBatch; |
59 | 57 | // An immediate command list for submission of batches |
60 | 58 | ur_command_list_manager immediateList; |
61 | | - /* Submitted batches (regular command lists), stored for the completion of |
62 | | - * their execution. After queueFinish(), the vector is cleared - at this |
63 | | - * point, the destructor of command_list_handle adds the given command list to |
64 | | - * the command list cache, to the stack assigned to the description of the |
65 | | - * command list. When a new regular command list is requested after |
66 | | - * queueFinish(), it is popped from the available stack rather than retrieved |
67 | | - * through a driver call, which improves performance. |
68 | | - */ |
| 59 | + // Submitted batches (regular command lists), stored for the completion of |
| 60 | + // their execution. After queueFinish(), the vector is cleared - at this |
| 61 | + // point, the destructor of command_list_handle adds the given command list to |
| 62 | + // the command list cache, to the stack assigned to the description of the |
| 63 | + // command list. When a new regular command list is requested after |
| 64 | + // queueFinish(), it is popped from the available stack rather than retrieved |
| 65 | + // through a driver call, which improves performance. |
69 | 66 | std::vector<v2::raii::command_list_unique_handle> runBatches; |
70 | | - /* The generation number of the current batch, assigned to events associated |
71 | | - * with operations enqueued on the given batch. It is incremented during every |
72 | | - * replacement of the current batch. When an event created by a batched queue |
73 | | - * appears in an eventWaitList, the batch assigned to the given event might |
74 | | - * not have been executed yet and the event might never be signalled. |
75 | | - * Comparing generation numbers enables determining whether the current batch |
76 | | - * should be submitted for execution. If the generation number of the current |
77 | | - * batch is higher than the number assigned to the given event, the batch |
78 | | - * associated with the event has already been submitted for execution and |
79 | | - * additional submission of the current batch is not needed. |
80 | | - */ |
| 67 | + // The generation number of the current batch, assigned to events associated |
| 68 | + // with operations enqueued on the given batch. It is incremented during every |
| 69 | + // replacement of the current batch. When an event created by a batched queue |
| 70 | + // appears in an eventWaitList, the batch assigned to the given event might |
| 71 | + // not have been executed yet and the event might never be signalled. |
| 72 | + // Comparing generation numbers enables determining whether the current batch |
| 73 | + // should be submitted for execution. If the generation number of the current |
| 74 | + // batch is higher than the number assigned to the given event, the batch |
| 75 | + // associated with the event has already been submitted for execution and |
| 76 | + // additional submission of the current batch is not needed. |
81 | 77 | ur_event_generation_t regularGenerationNumber; |
82 | | - /* The limit of regular command lists stored for execution; if exceeded, the |
83 | | - * vector is cleared as part of queueFinish and slots are renewed. |
84 | | - */ |
| 78 | + // The limit of regular command lists stored for execution; if exceeded, the |
| 79 | + // vector is cleared as part of queueFinish and slots are renewed. |
85 | 80 | static constexpr uint64_t initialSlotsForBatches = 10; |
86 | 81 | // Whether any operation has been enqueued on the current batch |
87 | 82 | bool isEmpty = true; |
@@ -148,12 +143,12 @@ struct ur_queue_batched_t : ur_object, ur_queue_t_ { |
148 | 143 |
|
149 | 144 | ur_queue_flags_t flags; |
150 | 145 |
|
151 | | - /* Regular command lists use the regular pool cache type, whereas immediate |
152 | | - * command lists use the immediate pool cache type. Since user-requested |
153 | | - * operations are enqueued on regular command lists and immediate command |
154 | | - * lists are only used internally by the batched queue implementation, events |
155 | | - * are not created for immediate command lists. |
156 | | - */ |
| 146 | + // Regular command lists use the regular pool cache type, whereas immediate |
| 147 | + // command lists use the immediate pool cache type. Since user-requested |
| 148 | + // operations are enqueued on regular command lists and immediate command |
| 149 | + // lists are only used internally by the batched queue implementation, events |
| 150 | + // are not created for immediate command lists. |
| 151 | + |
157 | 152 | v2::raii::cache_borrowed_event_pool eventPoolRegular; |
158 | 153 |
|
159 | 154 | v2::raii::command_list_unique_handle getNewRegularCmdList() { |
|
0 commit comments