|
| 1 | +/******************************************************************************* |
| 2 | + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * |
| 3 | + * All rights reserved. * |
| 4 | + * * |
| 5 | + * This source code and the accompanying materials are made available under * |
| 6 | + * the terms of the Apache License 2.0 which accompanies this distribution. * |
| 7 | + ******************************************************************************/ |
| 8 | + |
| 9 | +#include "PassDetails.h" |
| 10 | +#include "cudaq/Optimizer/Builder/Intrinsics.h" |
| 11 | +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" |
| 12 | +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" |
| 13 | +#include "cudaq/Optimizer/Transforms/Passes.h" |
| 14 | +#include "mlir/Dialect/Complex/IR/Complex.h" |
| 15 | +#include "mlir/IR/BuiltinOps.h" |
| 16 | +#include "mlir/IR/Dominance.h" |
| 17 | +#include "mlir/IR/PatternMatch.h" |
| 18 | +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" |
| 19 | +#include "mlir/Transforms/Passes.h" |
| 20 | + |
| 21 | +namespace cudaq::opt { |
| 22 | +#define GEN_PASS_DEF_WRITEAFTERWRITEELIMINATION |
| 23 | +#include "cudaq/Optimizer/Transforms/Passes.h.inc" |
| 24 | +} // namespace cudaq::opt |
| 25 | + |
| 26 | +#define DEBUG_TYPE "write-after-write-elimination" |
| 27 | + |
| 28 | +using namespace mlir; |
| 29 | + |
| 30 | +namespace { |
| 31 | +/// Remove stores followed by a store to the same pointer |
| 32 | +/// if the pointer is not used in between. |
| 33 | +/// ``` |
| 34 | +/// cc.store %c0_i64, %1 : !cc.ptr<i64> |
| 35 | +/// // no use of %1 until next line |
| 36 | +/// cc.store %0, %1 : !cc.ptr<i64> |
| 37 | +/// ─────────────────────────────────────────── |
| 38 | +/// cc.store %0, %1 : !cc.ptr<i64> |
| 39 | +/// ``` |
| 40 | +class SimplifyWritesAnalysis { |
| 41 | +public: |
| 42 | + SimplifyWritesAnalysis(DominanceInfo &di, Operation *op) : dom(di) { |
| 43 | + for (auto ®ion : op->getRegions()) |
| 44 | + for (auto &b : region) |
| 45 | + collectBlockInfo(&b); |
| 46 | + } |
| 47 | + |
| 48 | + /// Remove stores followed by a store to the same pointer if the pointer is |
| 49 | + /// not used in between, using collected block info. |
| 50 | + void removeOverriddenStores() { |
| 51 | + SmallVector<Operation *> toErase; |
| 52 | + |
| 53 | + for (const auto &[block, ptrToStores] : blockInfo) { |
| 54 | + for (const auto &[ptr, stores] : ptrToStores) { |
| 55 | + if (stores.size() > 1) { |
| 56 | + auto replacement = stores.back(); |
| 57 | + for (auto it = stores.rend(); it != stores.rbegin(); it++) { |
| 58 | + auto store = *it; |
| 59 | + if (isReplacement(ptr, *store, *replacement)) { |
| 60 | + LLVM_DEBUG(llvm::dbgs() << "replacing store " << store |
| 61 | + << " by: " << replacement << '\n'); |
| 62 | + toErase.push_back(store->getOperation()); |
| 63 | + } |
| 64 | + } |
| 65 | + } |
| 66 | + } |
| 67 | + } |
| 68 | + |
| 69 | + for (auto *op : toErase) |
| 70 | + op->erase(); |
| 71 | + } |
| 72 | + |
| 73 | +private: |
| 74 | + /// Detect if value is used in the op or its nested blocks. |
| 75 | + bool isReplacement(Value ptr, cudaq::cc::StoreOp store, |
| 76 | + cudaq::cc::StoreOp replacement) const { |
| 77 | + // Check that there are no stores dominated by the store and not dominated |
| 78 | + // by the replacement (i.e. used in between the store and the replacement) |
| 79 | + for (auto *user : ptr.getUsers()) { |
| 80 | + if (user != store && user != replacement) { |
| 81 | + if (dom.dominates(store, user) && !dom.dominates(replacement, user)) { |
| 82 | + LLVM_DEBUG(llvm::dbgs() << "store " << replacement |
| 83 | + << " is used before: " << store << '\n'); |
| 84 | + return false; |
| 85 | + } |
| 86 | + } |
| 87 | + } |
| 88 | + return true; |
| 89 | + } |
| 90 | + |
| 91 | + /// Collect all stores to a pointer for a block. |
| 92 | + void collectBlockInfo(Block *block) { |
| 93 | + for (auto &op : *block) { |
| 94 | + for (auto ®ion : op.getRegions()) |
| 95 | + for (auto &b : region) |
| 96 | + collectBlockInfo(&b); |
| 97 | + |
| 98 | + if (auto store = dyn_cast<cudaq::cc::StoreOp>(&op)) { |
| 99 | + auto ptr = store.getPtrvalue(); |
| 100 | + if (isStoreToStack(store)) { |
| 101 | + auto ptrToStores = blockInfo.FindAndConstruct(block).second; |
| 102 | + auto stores = ptrToStores.FindAndConstruct(ptr).second; |
| 103 | + stores.push_back(&store); |
| 104 | + } |
| 105 | + } |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + /// Detect stores to stack locations, for example: |
| 110 | + /// ``` |
| 111 | + /// %1 = cc.alloca !cc.array<i64 x 2> |
| 112 | + /// |
| 113 | + /// %2 = cc.cast %1 : (!cc.ptr<!cc.array<i64 x 2>>) -> !cc.ptr<i64> |
| 114 | + /// cc.store %c0_i64, %2 : !cc.ptr<i64> |
| 115 | + /// |
| 116 | + /// %3 = cc.compute_ptr %1[1] : (!cc.ptr<!cc.array<i64 x 2>>) -> !cc.ptr<i64> |
| 117 | + /// cc.store %c0_i64, %3 : !cc.ptr<i64> |
| 118 | + /// ``` |
| 119 | + static bool isStoreToStack(cudaq::cc::StoreOp store) { |
| 120 | + auto ptrOp = store.getPtrvalue(); |
| 121 | + if (auto cast = ptrOp.getDefiningOp<cudaq::cc::CastOp>()) |
| 122 | + ptrOp = cast.getOperand(); |
| 123 | + |
| 124 | + if (auto computePtr = ptrOp.getDefiningOp<cudaq::cc::ComputePtrOp>()) |
| 125 | + ptrOp = computePtr.getBase(); |
| 126 | + |
| 127 | + return isa_and_present<cudaq::cc::AllocaOp>(ptrOp.getDefiningOp()); |
| 128 | + } |
| 129 | + |
| 130 | + DominanceInfo &dom; |
| 131 | + DenseMap<Block *, DenseMap<Value, SmallVector<cudaq::cc::StoreOp *>>> |
| 132 | + blockInfo; |
| 133 | +}; |
| 134 | + |
| 135 | +class WriteAfterWriteEliminationPass |
| 136 | + : public cudaq::opt::impl::WriteAfterWriteEliminationBase< |
| 137 | + WriteAfterWriteEliminationPass> { |
| 138 | +public: |
| 139 | + using WriteAfterWriteEliminationBase::WriteAfterWriteEliminationBase; |
| 140 | + |
| 141 | + void runOnOperation() override { |
| 142 | + auto op = getOperation(); |
| 143 | + DominanceInfo domInfo(op); |
| 144 | + |
| 145 | + LLVM_DEBUG(llvm::dbgs() |
| 146 | + << "Before write after write elimination: " << *op << '\n'); |
| 147 | + |
| 148 | + auto analysis = SimplifyWritesAnalysis(domInfo, op); |
| 149 | + analysis.removeOverriddenStores(); |
| 150 | + |
| 151 | + LLVM_DEBUG(llvm::dbgs() |
| 152 | + << "After write after write elimination: " << *op << '\n'); |
| 153 | + } |
| 154 | +}; |
| 155 | +} // namespace |
0 commit comments