From b80e5cf40a568bc27552cd79ae932bbf26b70e95 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 8 Feb 2024 17:02:37 -0800 Subject: [PATCH 01/88] Initial working attempt Signed-off-by: Engin Kayraklioglu --- .../optimizations/forallOptimizations.cpp | 69 +++++++++++++ modules/internal/ChapelArray.chpl | 97 +++++++++++++++++-- 2 files changed, 158 insertions(+), 8 deletions(-) diff --git a/compiler/optimizations/forallOptimizations.cpp b/compiler/optimizations/forallOptimizations.cpp index 725f18c745f2..b989b1f499ad 100644 --- a/compiler/optimizations/forallOptimizations.cpp +++ b/compiler/optimizations/forallOptimizations.cpp @@ -142,10 +142,15 @@ static void removeAggregatorFromFunction(Symbol *aggregator, FnSymbol *parent); static void removeAggregationFromRecursiveForallHelp(BlockStmt *block); static void autoAggregation(ForallStmt *forall); +static void bulkViewTransfer(); + void doPreNormalizeArrayOptimizations() { + bulkViewTransfer(); + const bool anyAnalysisNeeded = fAutoLocalAccess || fAutoAggregation || !fNoFastFollowers; + if (anyAnalysisNeeded) { forv_expanding_Vec(ForallStmt, forall, gForallStmts) { if (!fNoFastFollowers) { @@ -2452,3 +2457,67 @@ static bool isLocalAccess(CallExpr *call) { return false; } + +static void bulkViewTransfer() { + std::vector candidates; + + for_alive_in_Vec (CallExpr, call, gCallExprs) { + if (call->getModule()->modTag == MOD_USER) { + if (call->isNamed("=")) { + list_view(call); + + CallExpr* lhs = toCallExpr(call->get(1)); + CallExpr* rhs = toCallExpr(call->get(2)); + if (lhs && rhs) { + candidates.push_back(call); + } + } + } + } + + for_vector(CallExpr, call, candidates) { + SET_LINENO(call); + + CallExpr* lhs = toCallExpr(call->get(1)); + CallExpr* rhs = toCallExpr(call->get(2)); + + Expr* lhsBase = lhs->baseExpr; + Expr* rhsBase = rhs->baseExpr; + + + CallExpr* arrCheck = new CallExpr("chpl__basesSupportViewTransfer", + lhsBase->copy(), rhsBase->copy()); + CallExpr* slicingExprCheck = + new CallExpr("chpl__slicingExprsSupportViewTransfer"); + + CallExpr* lhsPSCall = new CallExpr("chpl__createProtoSlice", + lhsBase->copy()); + CallExpr* rhsPSCall = new CallExpr("chpl__createProtoSlice", + rhsBase->copy()); + + for_actuals(actual, lhs) { + slicingExprCheck->insertAtTail(actual->copy()); + lhsPSCall->insertAtTail(actual->copy()); + } + for_actuals(actual, rhs) { + slicingExprCheck->insertAtTail(actual->copy()); + rhsPSCall->insertAtTail(actual->copy()); + } + CallExpr* condExpr = new CallExpr("&&", arrCheck, slicingExprCheck); + + BlockStmt* thenBlock = new BlockStmt(); + VarSymbol* lhsPS = new VarSymbol("lhs_proto_slice"); + VarSymbol* rhsPS = new VarSymbol("rhs_proto_slice"); + + thenBlock->insertAtTail(new DefExpr(lhsPS, lhsPSCall)); + thenBlock->insertAtTail(new DefExpr(rhsPS, rhsPSCall)); + thenBlock->insertAtTail(new CallExpr("=", lhsPS, rhsPS)); + + BlockStmt* elseBlock = new BlockStmt(); + + CondStmt* arrCond = new CondStmt(condExpr, thenBlock, elseBlock); + + call->insertBefore(arrCond); + elseBlock->insertAtTail(call->remove()); + } +} diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 3bfb63429131..ae00fe6e2a3a 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2066,7 +2066,7 @@ module ChapelArray { } } - proc chpl__serializeAssignment(a: [], b) param { + proc chpl__serializeAssignment(a, b) param { if a.rank != 1 && isRange(b) then return true; @@ -2091,7 +2091,7 @@ module ChapelArray { } // This must be a param function - proc chpl__compatibleForBulkTransfer(a:[], b:[], param kind:_tElt) param { + proc chpl__compatibleForBulkTransfer(a, b, param kind:_tElt) param { if !useBulkTransfer then return false; if a.eltType != b.eltType then return false; if kind==_tElt.move then return true; @@ -2141,7 +2141,7 @@ module ChapelArray { proc chpl__supportedDataTypeForBulkTransfer(x) param do return true; @chpldoc.nodoc - proc checkArrayShapesUponAssignment(a: [], b: [], forSwap = false) { + proc checkArrayShapesUponAssignment(a, b, forSwap = false) { if a.isRectangular() && b.isRectangular() { const aDims = a._value.dom.dsiDims(), bDims = b._value.dom.dsiDims(); @@ -2158,9 +2158,17 @@ module ChapelArray { } } - pragma "find user line" - @chpldoc.nodoc - inline operator =(ref a: [], b:[]) { + proc isProtoSlice(a) param { return isSubtype(a.type, chpl__protoSlice); } + + proc isArrayOrProtoSlice(a) param { + return isArray(a) || isProtoSlice(a); + } + + proc areBothArraysOrProtoSlices(a,b) { + return isArrayOrProtoSlice(a) && isArrayOrProtoSlice(b); + } + + private inline proc arrayOrProtoSliceAssign(ref a, b) { if a.rank != b.rank then compilerError("rank mismatch in array assignment"); @@ -2186,6 +2194,19 @@ module ChapelArray { chpl__uncheckedArrayTransfer(a, b, kind=_tElt.assign); } + + pragma "find user line" + @chpldoc.nodoc + inline operator =(ref a: [], b: []) { + arrayOrProtoSliceAssign(a, b); + } + + pragma "find user line" + @chpldoc.nodoc + inline operator =(ref a: chpl__protoSlice, b: chpl__protoSlice) { + arrayOrProtoSliceAssign(a, b); + } + // what kind of transfer to do for each element? @chpldoc.nodoc enum _tElt { @@ -2293,7 +2314,7 @@ module ChapelArray { } pragma "find user line" - inline proc chpl__uncheckedArrayTransfer(ref a: [], b:[], param kind) { + inline proc chpl__uncheckedArrayTransfer(ref a, b, param kind) { var done = false; if !chpl__serializeAssignment(a, b) { @@ -2359,10 +2380,70 @@ module ChapelArray { inline proc chpl__bulkTransferArray(ref a: [?AD], b : [?BD]) { return chpl__bulkTransferArray(a, AD, b, BD); } + + inline proc chpl__bulkTransferArray(ref a: chpl__protoSlice, + b: chpl__protoSlice) { + + return chpl__bulkTransferArray(a.ptrToArr.deref(), {(...a.slicingExprs)}, + b.ptrToArr.deref(), {(...b.slicingExprs)}); + + } inline proc chpl__bulkTransferArray(ref a: [], AD : domain, const ref b: [], BD : domain) { return chpl__bulkTransferArray(a._value, AD, b._value, BD); } + record chpl__protoSlice { + var ptrToArr; // I want this to be a `forwarding ref` to the array + var slicingExprs; + + proc init=(other: chpl__protoSlice) { + this.ptrToArr = other.ptrToArr; + this.slicingExprs = other.slicingExprs; + init this; + extern proc printf(s...); + printf("this is probably not what you want\n"); + } + + inline proc rank param { return ptrToArr.deref().rank; } + inline proc eltType type { return ptrToArr.deref().eltType; } + inline proc _value { return ptrToArr.deref()._value; } + inline proc sizeAs(type t) { return ptrToArr.deref().sizeAs(t); } + inline proc isRectangular() param { return ptrToArr.deref().isRectangular(); } + iter these() ref { + for i in {(...slicingExprs)}.these() { + yield ptrToArr.deref()[i]; + } + } + iter these(param tag: iterKind) where tag==iterKind.leader { + for followThis in {(...slicingExprs)}.these(iterKind.leader) { + yield followThis; + } + } + iter these(param tag: iterKind, followThis) ref where tag==iterKind.follower { + for i in {(...slicingExprs)}.these(iterKind.follower, followThis) { + yield ptrToArr.deref()[i]; + } + } + } + + proc chpl__createProtoSlice(ref Arr, slicingExprs...) { + return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); + } + + proc chpl__basesSupportViewTransfer(a, b) param { + return chpl__isDROrDRView(a) && chpl__isDROrDRView(b); + /*return false;*/ + } + + proc chpl__slicingExprsSupportViewTransfer(x...) param { + return (isHomogeneousTuple(x) && isRange(x[0])); + } + + proc chpl__bulkTransferView(a, aTuple, b, bTuple) { + compilerAssert(aTuple.size == bTuple.size); + chpl__bulkTransferArray(a, {(...aTuple)}, b, {(...bTuple)}); + } + inline proc chpl__bulkTransferArray(destClass, destDom : domain, srcClass, srcDom : domain) { var success = false; @@ -2407,7 +2488,7 @@ module ChapelArray { pragma "find user line" pragma "ignore transfer errors" - inline proc chpl__transferArray(ref a: [], const ref b, + inline proc chpl__transferArray(ref a, const ref b, param kind=_tElt.assign) lifetime a <= b { if (a.eltType == b.type || _isPrimitiveType(a.eltType) && _isPrimitiveType(b.type)) { From 15c5eaba6da68e6f42a460638bcfbcc0bee971a4 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 8 Feb 2024 17:33:30 -0800 Subject: [PATCH 02/88] Rig the bulk transfer support to take ranges Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 33 +++++++++----- modules/internal/DefaultRectangular.chpl | 56 +++++++++++++++++++++--- 2 files changed, 70 insertions(+), 19 deletions(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index ae00fe6e2a3a..bbf2fb7f3958 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2384,11 +2384,18 @@ module ChapelArray { inline proc chpl__bulkTransferArray(ref a: chpl__protoSlice, b: chpl__protoSlice) { - return chpl__bulkTransferArray(a.ptrToArr.deref(), {(...a.slicingExprs)}, - b.ptrToArr.deref(), {(...b.slicingExprs)}); + if a.slicingExprs.size == 1 { + // check the other too? + return chpl__bulkTransferArray(a.ptrToArr.deref(), a.slicingExprs[0], + b.ptrToArr.deref(), b.slicingExprs[0]); + } + else { + return chpl__bulkTransferArray(a.ptrToArr.deref(), {(...a.slicingExprs)}, + b.ptrToArr.deref(), {(...b.slicingExprs)}); + } } - inline proc chpl__bulkTransferArray(ref a: [], AD : domain, const ref b: [], BD : domain) { + inline proc chpl__bulkTransferArray(ref a: [], AD, const ref b: [], BD) { return chpl__bulkTransferArray(a._value, AD, b._value, BD); } @@ -2444,7 +2451,7 @@ module ChapelArray { chpl__bulkTransferArray(a, {(...aTuple)}, b, {(...bTuple)}); } - inline proc chpl__bulkTransferArray(destClass, destDom : domain, srcClass, srcDom : domain) { + inline proc chpl__bulkTransferArray(destClass, destView, srcClass, srcView) { var success = false; inline proc bulkTransferDebug(msg:string) { @@ -2452,6 +2459,8 @@ module ChapelArray { } bulkTransferDebug("in chpl__bulkTransferArray"); + bulkTransferDebug("destView.type: " + destView.type:string + + "srcView.type: " + srcView.type:string); // // BHARSH TODO: I would prefer to hoist these 'canResolveMethod' calls into @@ -2461,21 +2470,21 @@ module ChapelArray { // TODO: should we attempt other bulk transfer methods if one fails? // if Reflection.canResolveMethod(destClass, "doiBulkTransferFromKnown", - destDom, srcClass, srcDom) { + destView, srcClass, srcView) { bulkTransferDebug("attempting doiBulkTransferFromKnown"); - success = destClass.doiBulkTransferFromKnown(destDom, srcClass, srcDom); + success = destClass.doiBulkTransferFromKnown(destView, srcClass, srcView); } else if Reflection.canResolveMethod(srcClass, "doiBulkTransferToKnown", - srcDom, destClass, destDom) { + srcView, destClass, destView) { bulkTransferDebug("attempting doiBulkTransferToKnown"); - success = srcClass.doiBulkTransferToKnown(srcDom, destClass, destDom); + success = srcClass.doiBulkTransferToKnown(srcView, destClass, destView); } else if Reflection.canResolveMethod(destClass, "doiBulkTransferFromAny", - destDom, srcClass, srcDom) { + destView, srcClass, srcView) { bulkTransferDebug("attempting doiBulkTransferFromAny"); - success = destClass.doiBulkTransferFromAny(destDom, srcClass, srcDom); + success = destClass.doiBulkTransferFromAny(destView, srcClass, srcView); } else if Reflection.canResolveMethod(srcClass, "doiBulkTransferToAny", - srcDom, destClass, destDom) { + srcView, destClass, destView) { bulkTransferDebug("attempting doiBulkTransferToAny"); - success = srcClass.doiBulkTransferToAny(srcDom, destClass, destDom); + success = srcClass.doiBulkTransferToAny(srcView, destClass, destView); } if success then diff --git a/modules/internal/DefaultRectangular.chpl b/modules/internal/DefaultRectangular.chpl index c18760142245..bb4b5f27d9ea 100644 --- a/modules/internal/DefaultRectangular.chpl +++ b/modules/internal/DefaultRectangular.chpl @@ -1978,6 +1978,25 @@ module DefaultRectangular { dsiSerialReadWrite(f); } + inline proc DefaultRectangularArr.isDataContiguous(dom: domain) { + return isDataContiguous(dom._value); + } + + // This is very conservative. + inline proc DefaultRectangularArr.isDataContiguous(dom: range) { + if rank != 1 then return false; + + if debugDefaultDistBulkTransfer then + chpl_debug_writeln("isDataContiguous(): off=", off, " blk=", blk); + + if blk(rank-1) != 1 then return false; + + if debugDefaultDistBulkTransfer then + chpl_debug_writeln("\tYES!"); + + return true; + } + // This is very conservative. proc DefaultRectangularArr.isDataContiguous(dom) { if debugDefaultDistBulkTransfer then @@ -1998,7 +2017,7 @@ module DefaultRectangular { } private proc _canDoSimpleTransfer(A, aView, B, bView) { - if !A.isDataContiguous(aView._value) || !B.isDataContiguous(bView._value) { + if !A.isDataContiguous(aView) || !B.isDataContiguous(bView) { if debugDefaultDistBulkTransfer then chpl_debug_writeln("isDataContiguous return False"); return false; @@ -2023,6 +2042,13 @@ module DefaultRectangular { return transferHelper(this, destDom, srcClass, srcDom); } + /*proc DefaultRectangularArr.doiBulkTransferFromKnown(destRange: range,*/ + /*srcClass:DefaultRectangularArr,*/ + /*srcRange: range) : bool {*/ + /*return transferHelper(this, destRange, srcClass, srcRange);*/ + + /*}*/ + private proc transferHelper(A, aView, B, bView) : bool { if A.rank == B.rank && aView.hasUnitStride() && bView.hasUnitStride() && @@ -2049,15 +2075,29 @@ module DefaultRectangular { param rank = A.rank; type idxType = A.idxType; - const Adims = aView.dims(); var Alo: rank*aView.idxType; - for param i in 0..rank-1 do - Alo(i) = Adims(i).first; - const Bdims = bView.dims(); + if isDomain(aView) { + const Adims = aView.dims(); + for param i in 0..rank-1 do + Alo(i) = Adims(i).first; + } + else if isRange(aView) { + Alo(0) = aView.first; + } + else { + compilerError("Unexpected type"); + } + var Blo: rank*B.idxType; + if isDomain(bView) { + const Bdims = bView.dims(); for param i in 0..rank-1 do Blo(i) = Bdims(i).first; + } + else if isRange(bView) { + Blo(0) = bView.first; + } const len = aView.sizeAs(aView.chpl_integralIdxType).safeCast(c_size_t); @@ -2214,8 +2254,10 @@ module DefaultRectangular { writeln("Original domains :", LHS.dom.dsiDims(), " <-- ", RHS.dom.dsiDims()); } - const LeftDims = LViewDom.dims(); - const RightDims = RViewDom.dims(); + const LeftDims = if isDomain(LViewDom) then LViewDom.dims() + else (LViewDom,); + const RightDims = if isDomain(RViewDom) then RViewDom.dims() + else (RViewDom, ); const (LeftActives, RightActives, inferredRank) = bulkCommComputeActiveDims(LeftDims, RightDims); From 544db0e0b0fbddc879d7480e0da961523faa3d2c Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 9 Feb 2024 14:13:31 -0800 Subject: [PATCH 03/88] Improve C pointer creation performance Signed-off-by: Engin Kayraklioglu --- modules/standard/CTypes.chpl | 38 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/modules/standard/CTypes.chpl b/modules/standard/CTypes.chpl index abe8a0d2ddeb..a66c03b84e49 100644 --- a/modules/standard/CTypes.chpl +++ b/modules/standard/CTypes.chpl @@ -836,16 +836,16 @@ module CTypes { */ @chpldoc.nodoc inline proc c_ptrTo(ref arr: []): c_ptr(arr.eltType) { - if (!arr.isRectangular() || !arr.domain.distribution._value.dsiIsLayout()) then + if (!isSubtype(arr.domain._instance.type, DefaultRectangularDom)) then compilerError("Only single-locale rectangular arrays support c_ptrTo() at present"); - if (arr._value.locale != here) then - halt( - "c_ptrTo() can only be applied to an array from the locale on " + - "which it lives (array is on locale " + arr._value.locale.id:string + - ", call was made on locale " + here.id:string + ")"); - if boundsChecking { + if (arr._value.locale != here) then + halt( + "c_ptrTo() can only be applied to an array from the locale on " + + "which it lives (array is on locale " + arr._value.locale.id:string + + ", call was made on locale " + here.id:string + ")"); + if (arr.size == 0) then halt("Can't create a C pointer for an array with 0 elements."); } @@ -925,16 +925,16 @@ module CTypes { */ @chpldoc.nodoc inline proc c_ptrToConst(const arr: []): c_ptrConst(arr.eltType) { - if (!arr.isRectangular() || !arr.domain.distribution._value.dsiIsLayout()) then + if (!isSubtype(arr.domain._instance.type, DefaultRectangularDom)) then compilerError("Only single-locale rectangular arrays support c_ptrToConst() at present"); - if (arr._value.locale != here) then - halt( - "c_ptrToConst() can only be applied to an array from the locale on " + - "which it lives (array is on locale " + arr._value.locale.id:string + - ", call was made on locale " + here.id:string + ")"); - if boundsChecking { + if (arr._value.locale != here) then + halt( + "c_ptrToConst() can only be applied to an array from the locale on " + + "which it lives (array is on locale " + arr._value.locale.id:string + + ", call was made on locale " + here.id:string + ")"); + if (arr.size == 0) then halt("Can't create a C pointer for an array with 0 elements."); } @@ -996,10 +996,10 @@ module CTypes { */ @chpldoc.nodoc inline proc c_addrOf(ref arr: []) { - if (!arr.isRectangular() || !arr.domain.distribution._value.dsiIsLayout()) then - compilerError("Only single-locale rectangular arrays support c_addrOf() at present"); + if (!isSubtype(arr.domain._instance.type, DefaultRectangularDom)) then + compilerError("Only single-locale rectangular arrays support c_addrOfConst() at present"); - if (arr._value.locale != here) then + if (boundsChecking && arr._value.locale != here) then halt( "c_addrOf() can only be applied to an array from the locale on " + "which it lives (array is on locale " + arr._value.locale.id:string + @@ -1014,10 +1014,10 @@ module CTypes { */ @chpldoc.nodoc inline proc c_addrOfConst(const arr: []) { - if (!arr.isRectangular() || !arr.domain.distribution._value.dsiIsLayout()) then + if (!isSubtype(arr.domain._instance.type, DefaultRectangularDom)) then compilerError("Only single-locale rectangular arrays support c_addrOfConst() at present"); - if (arr._value.locale != here) then + if (boundsChecking && arr._value.locale != here) then halt( "c_addrOfConst() can only be applied to an array from the locale on " + "which it lives (array is on locale " + arr._value.locale.id:string + From 8fada4309b509acf33fcb97ada8732b49c434771 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 9 Feb 2024 14:17:17 -0800 Subject: [PATCH 04/88] Special-case/improve 1D protoSlice iteration Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 32 +++++++-- modules/internal/DefaultRectangular.chpl | 88 ++++++++++++++---------- 2 files changed, 76 insertions(+), 44 deletions(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index bbf2fb7f3958..f6945033c11b 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2411,24 +2411,44 @@ module ChapelArray { printf("this is probably not what you want\n"); } + inline proc domOrRange { + if rank == 1 then + return slicingExprs[0]; + else + return {(...slicingExprs)}; + } + inline proc rank param { return ptrToArr.deref().rank; } inline proc eltType type { return ptrToArr.deref().eltType; } inline proc _value { return ptrToArr.deref()._value; } - inline proc sizeAs(type t) { return ptrToArr.deref().sizeAs(t); } + inline proc sizeAs(type t) { return domOrRange.sizeAs(t); } inline proc isRectangular() param { return ptrToArr.deref().isRectangular(); } + iter these() ref { - for i in {(...slicingExprs)}.these() { - yield ptrToArr.deref()[i]; + ref arrInst = ptrToArr.deref()._instance; + if rank == 1 then { + foreach elem in chpl__serialViewIter1D(arrInst, domOrRange) { + yield elem; + } + } + else { + const viewDomInst = domOrRange._instance; + foreach elem in chpl__serialViewIter(arrInst, viewDomInst) { + yield elem; + } } } + iter these(param tag: iterKind) where tag==iterKind.leader { - for followThis in {(...slicingExprs)}.these(iterKind.leader) { + for followThis in domOrRange.these(iterKind.leader) { yield followThis; } } + iter these(param tag: iterKind, followThis) ref where tag==iterKind.follower { - for i in {(...slicingExprs)}.these(iterKind.follower, followThis) { - yield ptrToArr.deref()[i]; + ref arr = ptrToArr.deref(); + foreach i in domOrRange.these(iterKind.follower, followThis) { + yield arr[i]; } } } diff --git a/modules/internal/DefaultRectangular.chpl b/modules/internal/DefaultRectangular.chpl index bb4b5f27d9ea..562d3325820c 100644 --- a/modules/internal/DefaultRectangular.chpl +++ b/modules/internal/DefaultRectangular.chpl @@ -1622,51 +1622,63 @@ module DefaultRectangular { } } - iter chpl__serialViewIter(arr, viewDom) ref - where chpl__isDROrDRView(arr) { + // This is specialized to avoid overheads of calling dsiAccess() + iter chpl__serialViewIter1D(arr, viewRange) ref + where chpl__isDROrDRView(arr) { + param useCache = chpl__isArrayView(arr) && arr.shouldUseIndexCache(); var info = if useCache then arr.indexCache else if arr.isSliceArrayView() then arr.arr else arr; - if arr.rank == 1 { - // This is specialized to avoid overheads of calling dsiAccess() - if viewDom.hasUnitStride() { - // Ideally we would like to be able to do something like - // "for i in first..last by step". However, right now that would - // result in a strided iterator which isn't as optimized. It would - // also add a range initializer, which in tight loops is pretty - // expensive. Instead we use a direct range iterator that is - // optimized for positively strided ranges. It should be just as fast - // as directly using a "c for loop", but it contains code check for - // overflow and invalid strides as well as the ability to use a less - // optimized iteration method if users are concerned about range - // overflow. - - const first = info.getDataIndex(viewDom.dsiLow); - const second = info.getDataIndex(chpl__intToIdx(viewDom.idxType, chpl__idxToInt(viewDom.dsiLow)+1)); - const step = (second-first); - const last = first + (viewDom.dsiNumIndices:step.type-1) * step; - foreach i in chpl_direct_pos_stride_range_iter(first, last, step) with (ref info) { - yield info.theData(i); - } - } else { - type vdIntIdxType = chpl__idxTypeToIntIdxType(viewDom.idxType); - const viewDomDim = viewDom.dsiDim(0), - stride = viewDomDim.stride: vdIntIdxType, - start = viewDomDim.first, - second = info.getDataIndex(chpl__intToIdx(viewDom.idxType, viewDomDim.firstAsInt + stride)); - var first = info.getDataIndex(start); - const step = (second-first).safeCast(int); - var last = first + (viewDomDim.sizeAs(int)-1) * step; + if viewRange.hasUnitStride() { + // Ideally we would like to be able to do something like + // "for i in first..last by step". However, right now that would + // result in a strided iterator which isn't as optimized. It would + // also add a range initializer, which in tight loops is pretty + // expensive. Instead we use a direct range iterator that is + // optimized for positively strided ranges. It should be just as fast + // as directly using a "c for loop", but it contains code check for + // overflow and invalid strides as well as the ability to use a less + // optimized iteration method if users are concerned about range + // overflow. + + const first = info.getDataIndex(viewRange.low); + const second = info.getDataIndex(chpl__intToIdx(viewRange.idxType, chpl__idxToInt(viewRange.low)+1)); + const step = (second-first); + const last = first + (viewRange.size:step.type-1) * step; + foreach i in chpl_direct_pos_stride_range_iter(first, last, step) { + yield info.theData(i); + } + } else { + type vdIntIdxType = chpl__idxTypeToIntIdxType(viewRange.idxType); + const stride = viewRange.stride: vdIntIdxType, + start = viewRange.first, + second = info.getDataIndex(chpl__intToIdx(viewRange.idxType, viewRange.firstAsInt + stride)); - if step < 0 then - last <=> first; + var first = info.getDataIndex(start); + const step = (second-first).safeCast(int); + var last = first + (viewRange.sizeAs(int)-1) * step; - var data = info.theData; - foreach i in first..last by step do - yield data(i); - } + if step < 0 then + last <=> first; + + var data = info.theData; + foreach i in first..last by step do + yield data(i); + } + + } + + iter chpl__serialViewIter(arr, viewDom) ref + where chpl__isDROrDRView(arr) { + param useCache = chpl__isArrayView(arr) && arr.shouldUseIndexCache(); + var info = if useCache then arr.indexCache + else if arr.isSliceArrayView() then arr.arr + else arr; + if arr.rank == 1 { + foreach elem in chpl__serialViewIter1D(arr, viewDom.dsiDim[0]) do + yield elem; } else if useCache { foreach i in viewDom { const dataIdx = info.getDataIndex(i); From a6d3ee8f2a26fd7ce449bff564f109a61a95171f Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 9 Feb 2024 14:20:14 -0800 Subject: [PATCH 05/88] Add a basic short array transfer optimization Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 48 +++++++++++++++++-------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index f6945033c11b..ec3fca0bb28b 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2316,27 +2316,32 @@ module ChapelArray { pragma "find user line" inline proc chpl__uncheckedArrayTransfer(ref a, b, param kind) { - var done = false; - if !chpl__serializeAssignment(a, b) { - if chpl__compatibleForBulkTransfer(a, b, kind) { - done = chpl__bulkTransferArray(a, b); - } - else if chpl__compatibleForWidePtrBulkTransfer(a, b, kind) { - done = chpl__bulkTransferPtrArray(a, b); - } - // If we did a bulk transfer, it just bit copied, so need to - // run copy initializer still - if done { - if kind==_tElt.initCopy && !isPODType(a.eltType) { - initCopyAfterTransfer(a); - } else if kind==_tElt.move && (isSubtype(a.eltType, _array) || - isSubtype(a.eltType, _domain)) { - fixEltRuntimeTypesAfterTransfer(a); + if isProtoSlice(a) && isProtoSlice(b) && a.sizeAs(int) < 100 { + chpl__transferArray(a, b, kind, alwaysSerialize=true); + } + else { + var done = false; + if !chpl__serializeAssignment(a, b) { + if chpl__compatibleForBulkTransfer(a, b, kind) { + done = chpl__bulkTransferArray(a, b); + } + else if chpl__compatibleForWidePtrBulkTransfer(a, b, kind) { + done = chpl__bulkTransferPtrArray(a, b); + } + // If we did a bulk transfer, it just bit copied, so need to + // run copy initializer still + if done { + if kind==_tElt.initCopy && !isPODType(a.eltType) { + initCopyAfterTransfer(a); + } else if kind==_tElt.move && (isSubtype(a.eltType, _array) || + isSubtype(a.eltType, _domain)) { + fixEltRuntimeTypesAfterTransfer(a); + } } } - } - if !done { - chpl__transferArray(a, b, kind); + if !done { + chpl__transferArray(a, b, kind); + } } } @@ -2518,7 +2523,8 @@ module ChapelArray { pragma "find user line" pragma "ignore transfer errors" inline proc chpl__transferArray(ref a, const ref b, - param kind=_tElt.assign) lifetime a <= b { + param kind=_tElt.assign, + param alwaysSerialize=false) lifetime a <= b { if (a.eltType == b.type || _isPrimitiveType(a.eltType) && _isPrimitiveType(b.type)) { @@ -2546,7 +2552,7 @@ module ChapelArray { aa = b; } } - } else if chpl__serializeAssignment(a, b) { + } else if alwaysSerialize || chpl__serializeAssignment(a, b) { if kind==_tElt.move { if needsInitWorkaround(a.eltType) { for (ai, bb) in zip(a.domain, b) { From d5b9c581f14ccff85f4301b64f2b41e2b546c26d Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 9 Feb 2024 15:47:32 -0800 Subject: [PATCH 06/88] Refactor unchecked array transfer Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 69 +++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index ec3fca0bb28b..1c57da1d6cdd 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2313,36 +2313,61 @@ module ChapelArray { } } + private proc chpl__staticCheckShortArrayTransfer(a, b) param { + return isProtoSlice(a) && isProtoSlice(b); + } + + private inline proc chpl__dynamicCheckShortArrayTransfer(a, b) { + param localCompilation = _local && CHPL_LOCALE_MODEL=="flat"; + const sizeOk = a.sizeAs(uint) > 100; + if localCompilation { + return sizeOk; + } + else { + return sizeOk && + __primitive("_wide_get_locale", a) == + __primitive("_wide_get_locale", b); + } + } + pragma "find user line" inline proc chpl__uncheckedArrayTransfer(ref a, b, param kind) { - - if isProtoSlice(a) && isProtoSlice(b) && a.sizeAs(int) < 100 { + if chpl__serializeAssignment(a, b) { + chpl__transferArray(a, b, kind); + } + else if chpl__staticCheckShortArrayTransfer(a, b) && + chpl__dynamicCheckShortArrayTransfer(a, b) { chpl__transferArray(a, b, kind, alwaysSerialize=true); } - else { - var done = false; - if !chpl__serializeAssignment(a, b) { - if chpl__compatibleForBulkTransfer(a, b, kind) { - done = chpl__bulkTransferArray(a, b); - } - else if chpl__compatibleForWidePtrBulkTransfer(a, b, kind) { - done = chpl__bulkTransferPtrArray(a, b); - } - // If we did a bulk transfer, it just bit copied, so need to - // run copy initializer still - if done { - if kind==_tElt.initCopy && !isPODType(a.eltType) { - initCopyAfterTransfer(a); - } else if kind==_tElt.move && (isSubtype(a.eltType, _array) || - isSubtype(a.eltType, _domain)) { - fixEltRuntimeTypesAfterTransfer(a); - } - } + else if chpl__compatibleForBulkTransfer(a, b, kind) { + if chpl__bulkTransferArray(a, b) { + chpl__initAfterBulkTransfer(a, kind); + } + else { + chpl__transferArray(a, b, kind); } - if !done { + } + else if chpl__compatibleForWidePtrBulkTransfer(a, b, kind) { + if chpl__bulkTransferPtrArray(a, b) { + chpl__initAfterBulkTransfer(a, kind); + } + else { chpl__transferArray(a, b, kind); } } + else { + chpl__transferArray(a, b, kind); + + } + } + + inline proc chpl__initAfterBulkTransfer(ref a, param kind) { + if kind==_tElt.initCopy && !isPODType(a.eltType) { + initCopyAfterTransfer(a); + } else if kind==_tElt.move && (isSubtype(a.eltType, _array) || + isSubtype(a.eltType, _domain)) { + fixEltRuntimeTypesAfterTransfer(a); + } } proc chpl__compatibleForWidePtrBulkTransfer(a, b, From 8f571b8cd691d49a9579753743ff9dd8f597aeef Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 9 Feb 2024 16:01:10 -0800 Subject: [PATCH 07/88] Fix a silly bug Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 1c57da1d6cdd..a9e3fa6ab41c 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2319,7 +2319,7 @@ module ChapelArray { private inline proc chpl__dynamicCheckShortArrayTransfer(a, b) { param localCompilation = _local && CHPL_LOCALE_MODEL=="flat"; - const sizeOk = a.sizeAs(uint) > 100; + const sizeOk = a.sizeAs(uint) < 100; if localCompilation { return sizeOk; } From 1d411f31862c7ba72403820e981983dc67b35c45 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 9 Feb 2024 16:02:05 -0800 Subject: [PATCH 08/88] Simplify domOrRange implementation Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index a9e3fa6ab41c..793343821bdc 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2413,17 +2413,8 @@ module ChapelArray { inline proc chpl__bulkTransferArray(ref a: chpl__protoSlice, b: chpl__protoSlice) { - - if a.slicingExprs.size == 1 { - // check the other too? - return chpl__bulkTransferArray(a.ptrToArr.deref(), a.slicingExprs[0], - b.ptrToArr.deref(), b.slicingExprs[0]); - } - else { - return chpl__bulkTransferArray(a.ptrToArr.deref(), {(...a.slicingExprs)}, - b.ptrToArr.deref(), {(...b.slicingExprs)}); - } - + return chpl__bulkTransferArray(a.ptrToArr.deref(), a.domOrRange, + b.ptrToArr.deref(), b.domOrRange); } inline proc chpl__bulkTransferArray(ref a: [], AD, const ref b: [], BD) { return chpl__bulkTransferArray(a._value, AD, b._value, BD); @@ -2441,17 +2432,18 @@ module ChapelArray { printf("this is probably not what you want\n"); } - inline proc domOrRange { - if rank == 1 then - return slicingExprs[0]; - else - return {(...slicingExprs)}; + inline proc domOrRange where rank==1 { + return slicingExprs; + } + + inline proc domOrRange where rank>1 { + return {(...slicingExprs)}; } inline proc rank param { return ptrToArr.deref().rank; } inline proc eltType type { return ptrToArr.deref().eltType; } inline proc _value { return ptrToArr.deref()._value; } - inline proc sizeAs(type t) { return domOrRange.sizeAs(t); } + inline proc sizeAs(type t) { return slicingExprs.sizeAs(t); } inline proc isRectangular() param { return ptrToArr.deref().isRectangular(); } iter these() ref { @@ -2483,6 +2475,11 @@ module ChapelArray { } } + proc chpl__createProtoSlice(ref Arr, slicingExprs) { + return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); + } + + pragma "last resort" proc chpl__createProtoSlice(ref Arr, slicingExprs...) { return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } From 341d81f6e54bf7726aa4d2323d2624b3cb20cb7a Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 9 Feb 2024 16:17:54 -0800 Subject: [PATCH 09/88] Add a flag for short array transfer threshold and use a smaller default Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 793343821bdc..548ba109f57d 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -80,6 +80,9 @@ module ChapelArray { @chpldoc.nodoc config param logAllArrEltAccess = false; + @chpldoc.nodoc + config const shortArrayTransferThreshold = 50; + proc _isPrivatized(value) param do return (!compiledForSingleLocale() || CHPL_LOCALE_MODEL=="gpu") && ((_privatization && value!.dsiSupportsPrivatization()) || @@ -2319,7 +2322,7 @@ module ChapelArray { private inline proc chpl__dynamicCheckShortArrayTransfer(a, b) { param localCompilation = _local && CHPL_LOCALE_MODEL=="flat"; - const sizeOk = a.sizeAs(uint) < 100; + const sizeOk = a.sizeAs(uint) < shortArrayTransferThreshold; if localCompilation { return sizeOk; } From 43cdd349db34e4aee58ee42b25f46511641b4190 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 9 Feb 2024 16:23:38 -0800 Subject: [PATCH 10/88] Cleanup and some comments Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 10 ++++------ modules/internal/DefaultRectangular.chpl | 7 ------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 548ba109f57d..d686f0e22ced 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2317,6 +2317,8 @@ module ChapelArray { } private proc chpl__staticCheckShortArrayTransfer(a, b) param { + // this is the case I'm focusing on in the initial PR. This can definitely + // be loosened up... by a lot. return isProtoSlice(a) && isProtoSlice(b); } @@ -2327,6 +2329,8 @@ module ChapelArray { return sizeOk; } else { + // No `.locale` to avoid overheads. Note that this is an optimization for + // fast-running code. Small things matter. return sizeOk && __primitive("_wide_get_locale", a) == __primitive("_wide_get_locale", b); @@ -2489,18 +2493,12 @@ module ChapelArray { proc chpl__basesSupportViewTransfer(a, b) param { return chpl__isDROrDRView(a) && chpl__isDROrDRView(b); - /*return false;*/ } proc chpl__slicingExprsSupportViewTransfer(x...) param { return (isHomogeneousTuple(x) && isRange(x[0])); } - proc chpl__bulkTransferView(a, aTuple, b, bTuple) { - compilerAssert(aTuple.size == bTuple.size); - chpl__bulkTransferArray(a, {(...aTuple)}, b, {(...bTuple)}); - } - inline proc chpl__bulkTransferArray(destClass, destView, srcClass, srcView) { var success = false; diff --git a/modules/internal/DefaultRectangular.chpl b/modules/internal/DefaultRectangular.chpl index 562d3325820c..feebbdd7342f 100644 --- a/modules/internal/DefaultRectangular.chpl +++ b/modules/internal/DefaultRectangular.chpl @@ -2054,13 +2054,6 @@ module DefaultRectangular { return transferHelper(this, destDom, srcClass, srcDom); } - /*proc DefaultRectangularArr.doiBulkTransferFromKnown(destRange: range,*/ - /*srcClass:DefaultRectangularArr,*/ - /*srcRange: range) : bool {*/ - /*return transferHelper(this, destRange, srcClass, srcRange);*/ - - /*}*/ - private proc transferHelper(A, aView, B, bView) : bool { if A.rank == B.rank && aView.hasUnitStride() && bView.hasUnitStride() && From a87fcf17614ebfbea07c5a6579bd5a01559a545b Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 9 Feb 2024 16:24:28 -0800 Subject: [PATCH 11/88] Add test Signed-off-by: Engin Kayraklioglu --- .../arrayViewElision/viewElisionPerf.chpl | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 test/optimizations/arrayViewElision/viewElisionPerf.chpl diff --git a/test/optimizations/arrayViewElision/viewElisionPerf.chpl b/test/optimizations/arrayViewElision/viewElisionPerf.chpl new file mode 100644 index 000000000000..cb655445267b --- /dev/null +++ b/test/optimizations/arrayViewElision/viewElisionPerf.chpl @@ -0,0 +1,42 @@ +use CTypes; +use Time; + +config type elemType = int; +var t: stopwatch; +config const totalSeconds = 5; +config const useLoop = false; +config const arrSize = 10; +config const checkGranularity = 10000; + +const arrRange = 1..arrSize; +const sliceRange = 2..arrSize; +var Arr1, Arr2: [1..arrSize] elemType; + +Arr2[2] = 4; + +t.start(); +var totalOps = 0; +while t.elapsed() < totalSeconds { + if useLoop { + for 1..checkGranularity { + for i in 2..arrSize { + Arr1[i] = Arr2[i]; + } + } + } + else { + for 1..checkGranularity { + Arr1[2..arrSize] = Arr2[2..arrSize]; + } + } + + totalOps += checkGranularity; +} +t.stop(); + +writeln("Check = ", Arr1[2]); + +const memCopied: real = totalOps*sliceRange.size*numBytes(elemType); +writef("Total time(s): %.2dr\n", t.elapsed()); +writef("Total memory copied(GB): %.2dr\n", memCopied/2**30); +writef("Sustained throughput(GB/s): %.2dr\n", memCopied/2**30/t.elapsed()); From 9896b8febcd0c7f4a4064279cb7e15e737d01d4a Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 9 Feb 2024 16:59:21 -0800 Subject: [PATCH 12/88] Remove a compiler debugging output Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/forallOptimizations.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/compiler/optimizations/forallOptimizations.cpp b/compiler/optimizations/forallOptimizations.cpp index b989b1f499ad..cdb961321045 100644 --- a/compiler/optimizations/forallOptimizations.cpp +++ b/compiler/optimizations/forallOptimizations.cpp @@ -2464,8 +2464,6 @@ static void bulkViewTransfer() { for_alive_in_Vec (CallExpr, call, gCallExprs) { if (call->getModule()->modTag == MOD_USER) { if (call->isNamed("=")) { - list_view(call); - CallExpr* lhs = toCallExpr(call->get(1)); CallExpr* rhs = toCallExpr(call->get(2)); if (lhs && rhs) { From e47df76eaa6ad78e46ef1a21edba00c264b0a6ff Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 12 Jun 2024 12:52:12 -0700 Subject: [PATCH 13/88] Add test, whitespace adjustments Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 8 +++---- .../arrayViewElision/viewElisionPerf.chpl | 23 +++++++++++++------ .../arrayViewElision/viewElisionPerf.compopts | 1 + .../arrayViewElision/viewElisionPerf.execopts | 1 + .../arrayViewElision/viewElisionPerf.good | 5 ++++ 5 files changed, 27 insertions(+), 11 deletions(-) create mode 100644 test/optimizations/arrayViewElision/viewElisionPerf.compopts create mode 100644 test/optimizations/arrayViewElision/viewElisionPerf.execopts create mode 100644 test/optimizations/arrayViewElision/viewElisionPerf.good diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index d686f0e22ced..de1c3db1db99 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2167,7 +2167,7 @@ module ChapelArray { return isArray(a) || isProtoSlice(a); } - proc areBothArraysOrProtoSlices(a,b) { + proc areBothArraysOrProtoSlices(a, b) { return isArrayOrProtoSlice(a) && isArrayOrProtoSlice(b); } @@ -2364,7 +2364,6 @@ module ChapelArray { } else { chpl__transferArray(a, b, kind); - } } @@ -2420,6 +2419,9 @@ module ChapelArray { inline proc chpl__bulkTransferArray(ref a: chpl__protoSlice, b: chpl__protoSlice) { + if debugBulkTransfer { + chpl_debug_writeln("Performing protoSlice bulk transfer"); + } return chpl__bulkTransferArray(a.ptrToArr.deref(), a.domOrRange, b.ptrToArr.deref(), b.domOrRange); } @@ -2507,8 +2509,6 @@ module ChapelArray { } bulkTransferDebug("in chpl__bulkTransferArray"); - bulkTransferDebug("destView.type: " + destView.type:string + - "srcView.type: " + srcView.type:string); // // BHARSH TODO: I would prefer to hoist these 'canResolveMethod' calls into diff --git a/test/optimizations/arrayViewElision/viewElisionPerf.chpl b/test/optimizations/arrayViewElision/viewElisionPerf.chpl index cb655445267b..5fd35796dd43 100644 --- a/test/optimizations/arrayViewElision/viewElisionPerf.chpl +++ b/test/optimizations/arrayViewElision/viewElisionPerf.chpl @@ -7,16 +7,19 @@ config const totalSeconds = 5; config const useLoop = false; config const arrSize = 10; config const checkGranularity = 10000; +config const correctness = false; const arrRange = 1..arrSize; const sliceRange = 2..arrSize; var Arr1, Arr2: [1..arrSize] elemType; +Arr2 = 2; + Arr2[2] = 4; t.start(); var totalOps = 0; -while t.elapsed() < totalSeconds { +do { if useLoop { for 1..checkGranularity { for i in 2..arrSize { @@ -31,12 +34,18 @@ while t.elapsed() < totalSeconds { } totalOps += checkGranularity; -} +} while t.elapsed() < totalSeconds; t.stop(); -writeln("Check = ", Arr1[2]); -const memCopied: real = totalOps*sliceRange.size*numBytes(elemType); -writef("Total time(s): %.2dr\n", t.elapsed()); -writef("Total memory copied(GB): %.2dr\n", memCopied/2**30); -writef("Sustained throughput(GB/s): %.2dr\n", memCopied/2**30/t.elapsed()); +if correctness { + assert(totalOps == 1); + assert(Arr2[2] == 4); + for a in Arr2[3..] do assert(a == 2); +} +else { + const memCopied: real = totalOps*sliceRange.size*numBytes(elemType); + writef("Total time(s): %.2dr\n", t.elapsed()); + writef("Total memory copied(GB): %.2dr\n", memCopied/2**30); + writef("Sustained throughput(GB/s): %.2dr\n", memCopied/2**30/t.elapsed()); +} diff --git a/test/optimizations/arrayViewElision/viewElisionPerf.compopts b/test/optimizations/arrayViewElision/viewElisionPerf.compopts new file mode 100644 index 000000000000..ea3b1c15d932 --- /dev/null +++ b/test/optimizations/arrayViewElision/viewElisionPerf.compopts @@ -0,0 +1 @@ +-sdebugBulkTransfer=true diff --git a/test/optimizations/arrayViewElision/viewElisionPerf.execopts b/test/optimizations/arrayViewElision/viewElisionPerf.execopts new file mode 100644 index 000000000000..6541ae922934 --- /dev/null +++ b/test/optimizations/arrayViewElision/viewElisionPerf.execopts @@ -0,0 +1 @@ +--correctness=true -suseLoop=false --checkGranularity=1 --arrSize=1000 --totalSeconds=-1 diff --git a/test/optimizations/arrayViewElision/viewElisionPerf.good b/test/optimizations/arrayViewElision/viewElisionPerf.good new file mode 100644 index 000000000000..a0d7afe160e5 --- /dev/null +++ b/test/optimizations/arrayViewElision/viewElisionPerf.good @@ -0,0 +1,5 @@ +Performing protoSlice bulk transfer +operator =(a:[],b:[]): in chpl__bulkTransferArray +operator =(a:[],b:[]): attempting doiBulkTransferFromKnown +In DefaultRectangular._simpleTransfer(): Alo=(2,), Blo=(2,), len=999, elemSize=8 +operator =(a:[],b:[]): successfully completed bulk transfer From 61d26a5a66fa2474b63d1bb4c7b4532331492d40 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 12 Jun 2024 13:14:25 -0700 Subject: [PATCH 14/88] Limit to resolved symexprs Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/forallOptimizations.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler/optimizations/forallOptimizations.cpp b/compiler/optimizations/forallOptimizations.cpp index cdb961321045..aae9b96ffdcb 100644 --- a/compiler/optimizations/forallOptimizations.cpp +++ b/compiler/optimizations/forallOptimizations.cpp @@ -2466,7 +2466,8 @@ static void bulkViewTransfer() { if (call->isNamed("=")) { CallExpr* lhs = toCallExpr(call->get(1)); CallExpr* rhs = toCallExpr(call->get(2)); - if (lhs && rhs) { + if (lhs && !isUnresolvedSymExpr(lhs->baseExpr) && + rhs && !isUnresolvedSymExpr(rhs->baseExpr)) { candidates.push_back(call); } } From ad527a2fc3f7f221513632bc8fd2f0c2d931520a Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 12 Jun 2024 15:17:47 -0700 Subject: [PATCH 15/88] Use the correct intent on foreach Signed-off-by: Engin Kayraklioglu --- modules/internal/DefaultRectangular.chpl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/internal/DefaultRectangular.chpl b/modules/internal/DefaultRectangular.chpl index feebbdd7342f..04c7bbd245ea 100644 --- a/modules/internal/DefaultRectangular.chpl +++ b/modules/internal/DefaultRectangular.chpl @@ -1647,7 +1647,8 @@ module DefaultRectangular { const second = info.getDataIndex(chpl__intToIdx(viewRange.idxType, chpl__idxToInt(viewRange.low)+1)); const step = (second-first); const last = first + (viewRange.size:step.type-1) * step; - foreach i in chpl_direct_pos_stride_range_iter(first, last, step) { + foreach i in chpl_direct_pos_stride_range_iter(first, last, step) + with (ref info) { yield info.theData(i); } } else { From e4e4249c1117046cd5b159d4db7158d81674d21c Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 12 Jun 2024 15:30:21 -0700 Subject: [PATCH 16/88] Only check for calls that have a baseExpr Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/forallOptimizations.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/compiler/optimizations/forallOptimizations.cpp b/compiler/optimizations/forallOptimizations.cpp index aae9b96ffdcb..abd7d199951f 100644 --- a/compiler/optimizations/forallOptimizations.cpp +++ b/compiler/optimizations/forallOptimizations.cpp @@ -2458,16 +2458,23 @@ static bool isLocalAccess(CallExpr *call) { return false; } +static bool exprSuitableForProtoSlice(Expr* e) { + if (CallExpr* call = toCallExpr(e)) { + if (call->baseExpr != nullptr && !isUnresolvedSymExpr(call->baseExpr)) { + return true; + } + } + return false; +} + static void bulkViewTransfer() { std::vector candidates; for_alive_in_Vec (CallExpr, call, gCallExprs) { if (call->getModule()->modTag == MOD_USER) { if (call->isNamed("=")) { - CallExpr* lhs = toCallExpr(call->get(1)); - CallExpr* rhs = toCallExpr(call->get(2)); - if (lhs && !isUnresolvedSymExpr(lhs->baseExpr) && - rhs && !isUnresolvedSymExpr(rhs->baseExpr)) { + if (exprSuitableForProtoSlice(call->get(1)) && + exprSuitableForProtoSlice(call->get(2))) { candidates.push_back(call); } } From 3e92c8bc68e633cb6b5b10a1b59fec61987afdf6 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 12 Jun 2024 15:37:49 -0700 Subject: [PATCH 17/88] Prevent nested calls for the time being Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/forallOptimizations.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler/optimizations/forallOptimizations.cpp b/compiler/optimizations/forallOptimizations.cpp index abd7d199951f..12c45fe13a06 100644 --- a/compiler/optimizations/forallOptimizations.cpp +++ b/compiler/optimizations/forallOptimizations.cpp @@ -2460,7 +2460,7 @@ static bool isLocalAccess(CallExpr *call) { static bool exprSuitableForProtoSlice(Expr* e) { if (CallExpr* call = toCallExpr(e)) { - if (call->baseExpr != nullptr && !isUnresolvedSymExpr(call->baseExpr)) { + if (call->baseExpr != nullptr && isSymExpr(call->baseExpr)) { return true; } } @@ -2475,6 +2475,7 @@ static void bulkViewTransfer() { if (call->isNamed("=")) { if (exprSuitableForProtoSlice(call->get(1)) && exprSuitableForProtoSlice(call->get(2))) { + //nprint_view(call); candidates.push_back(call); } } From aab4aa5fb47d6e3970932f7bc60de5f64792d455 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 12 Jun 2024 15:56:13 -0700 Subject: [PATCH 18/88] Add a flag to control short array transfer optimization Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index de1c3db1db99..97215aa71168 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -80,6 +80,8 @@ module ChapelArray { @chpldoc.nodoc config param logAllArrEltAccess = false; + @chpldoc.nodoc + config param disableShortArrayTransferOpt = false; @chpldoc.nodoc config const shortArrayTransferThreshold = 50; @@ -2319,7 +2321,7 @@ module ChapelArray { private proc chpl__staticCheckShortArrayTransfer(a, b) param { // this is the case I'm focusing on in the initial PR. This can definitely // be loosened up... by a lot. - return isProtoSlice(a) && isProtoSlice(b); + return !disableShortArrayTransferOpt && isProtoSlice(a) && isProtoSlice(b); } private inline proc chpl__dynamicCheckShortArrayTransfer(a, b) { From 89f63253b1c0264727aa105c775bed9d98d31746 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 12 Jun 2024 16:05:49 -0700 Subject: [PATCH 19/88] Fix chpl__protoSlice.sizeAs Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 97215aa71168..3e9fc02e4be3 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2454,7 +2454,18 @@ module ChapelArray { inline proc rank param { return ptrToArr.deref().rank; } inline proc eltType type { return ptrToArr.deref().eltType; } inline proc _value { return ptrToArr.deref()._value; } - inline proc sizeAs(type t) { return slicingExprs.sizeAs(t); } + + inline proc sizeAs(type t) where rank==1 { + return slicingExprs.sizeAs(t); + } + + inline proc sizeAs(type t) { + var size = 1:t; + for param r in 0.. Date: Wed, 12 Jun 2024 16:08:43 -0700 Subject: [PATCH 20/88] Fix chpl__isDROrDRView Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 3e9fc02e4be3..1cb8293a9958 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -516,7 +516,7 @@ module ChapelArray { param isDRView = chpl__isArrayView(value) && chpl__getActualArray(value).isDefaultRectangular(); return isDR || isDRView; } else { - compilerError("Invalid argument for chpl__isDROrDRView"); + return false; } } From ec4f17eb8a7f20fb3120a8685a8bce96caba9096 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 12 Jun 2024 16:18:48 -0700 Subject: [PATCH 21/88] Avoid considering captured functions Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/forallOptimizations.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/compiler/optimizations/forallOptimizations.cpp b/compiler/optimizations/forallOptimizations.cpp index 12c45fe13a06..99811729fdf6 100644 --- a/compiler/optimizations/forallOptimizations.cpp +++ b/compiler/optimizations/forallOptimizations.cpp @@ -2460,8 +2460,10 @@ static bool isLocalAccess(CallExpr *call) { static bool exprSuitableForProtoSlice(Expr* e) { if (CallExpr* call = toCallExpr(e)) { - if (call->baseExpr != nullptr && isSymExpr(call->baseExpr)) { - return true; + if (SymExpr* callBase = toSymExpr(call->baseExpr)) { + if (!isFnSymbol(callBase->symbol())) { + return true; + } } } return false; From aeaf4038dfba7a47b8710d5cd119fa401eb693b9 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 12 Jun 2024 16:38:11 -0700 Subject: [PATCH 22/88] Avoid touching negative-strided slices Signed-off-by: Engin Kayraklioglu --- modules/dists/DSIUtil.chpl | 2 ++ modules/internal/ChapelArray.chpl | 14 +++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/modules/dists/DSIUtil.chpl b/modules/dists/DSIUtil.chpl index efbc4977d83a..aa16ee241999 100644 --- a/modules/dists/DSIUtil.chpl +++ b/modules/dists/DSIUtil.chpl @@ -498,6 +498,8 @@ private proc asap1(arg) { if isSubtype(arg.type, BaseDom) then return asapTuple(arg.dsiDims()); if isSubtype(arg.type, BaseArr) then return asapTuple(arg.dom.dsiDims()); compilerError("asap1: unsupported argument type ", arg.type:string); + return false; // otherwise we get resolution errors before the compilerError + // above } // asapP1 = All Strides Are Positive - Param - 1 arg diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 1cb8293a9958..b38d194daec2 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2511,7 +2511,19 @@ module ChapelArray { } proc chpl__slicingExprsSupportViewTransfer(x...) param { - return (isHomogeneousTuple(x) && isRange(x[0])); + if isHomogeneousTuple(x) && isRange(x[0]) { + for param i in 0.. Date: Wed, 12 Jun 2024 17:01:09 -0700 Subject: [PATCH 23/88] Fix an identity comparison Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index b38d194daec2..7bfd1d2c3424 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2182,7 +2182,18 @@ module ChapelArray { // default initializer is a forall expr. E.g. arrayInClassRecord.chpl. return; - if a._value == b._value { + var eqVals: bool; + if isArray(a) && isArray(b) { + eqVals = (a._value == b._value); + } + else if isProtoSlice(a) && isProtoSlice(b) { + eqVals = (a == b); // default record comparison should cover it + } + else { + compilerError("Internal error: cross-type assignments are not supported"); + } + + if eqVals then { // Do nothing for A = A but we could generate a warning here // since it is probably unintended. We need this check here in order // to avoid memcpy(x,x) which happens inside doiBulkTransfer. From 9b6428f739b1b9a45b3457e5808b7094aa93f073 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 12 Jun 2024 17:14:47 -0700 Subject: [PATCH 24/88] Add support for bounds checking Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 7bfd1d2c3424..49f66380ac1b 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2148,8 +2148,10 @@ module ChapelArray { @chpldoc.nodoc proc checkArrayShapesUponAssignment(a, b, forSwap = false) { if a.isRectangular() && b.isRectangular() { - const aDims = a._value.dom.dsiDims(), - bDims = b._value.dom.dsiDims(); + const aDims = if isProtoSlice(a) then a.dims() + else a._value.dom.dsiDims(); + const bDims = if isProtoSlice(b) then b.dims() + else b._value.dom.dsiDims(); compilerAssert(aDims.size == bDims.size); for param i in 0..aDims.size-1 { if aDims(i).sizeAs(uint) != bDims(i).sizeAs(uint) then @@ -2462,6 +2464,14 @@ module ChapelArray { return {(...slicingExprs)}; } + inline proc dims() where rank == 1 { + return (slicingExprs,); + } + + inline proc dims() { + return slicingExprs; + } + inline proc rank param { return ptrToArr.deref().rank; } inline proc eltType type { return ptrToArr.deref().eltType; } inline proc _value { return ptrToArr.deref()._value; } From 59938605c53381ac0c23dc804cc878eb5ca315fa Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 12 Jun 2024 17:17:13 -0700 Subject: [PATCH 25/88] Trivial fix Signed-off-by: Engin Kayraklioglu --- modules/standard/CTypes.chpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/standard/CTypes.chpl b/modules/standard/CTypes.chpl index a66c03b84e49..ff0780dbab18 100644 --- a/modules/standard/CTypes.chpl +++ b/modules/standard/CTypes.chpl @@ -997,7 +997,7 @@ module CTypes { @chpldoc.nodoc inline proc c_addrOf(ref arr: []) { if (!isSubtype(arr.domain._instance.type, DefaultRectangularDom)) then - compilerError("Only single-locale rectangular arrays support c_addrOfConst() at present"); + compilerError("Only single-locale rectangular arrays support c_addrOf() at present"); if (boundsChecking && arr._value.locale != here) then halt( From c36d1795b9327722b48c98e3a77d8773cdb48ed7 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 13 Jun 2024 12:46:16 -0700 Subject: [PATCH 26/88] Fix a strange bug with iterators Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 49f66380ac1b..febb19a981d5 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2490,15 +2490,23 @@ module ChapelArray { inline proc isRectangular() param { return ptrToArr.deref().isRectangular(); } iter these() ref { - ref arrInst = ptrToArr.deref()._instance; if rank == 1 then { - foreach elem in chpl__serialViewIter1D(arrInst, domOrRange) { + foreach elem in chpl__serialViewIter1D(ptrToArr.deref()._instance, + domOrRange) { yield elem; } } else { - const viewDomInst = domOrRange._instance; - foreach elem in chpl__serialViewIter(arrInst, viewDomInst) { + + /* + Storing `inst` here and iterating over `inst` doesn't seem to work. + Check the arrays primer for how that causes issues. Potentially an + iterator inlining issue, or memory cleanup going sideways. + + const inst = domOrRange._instance; + */ + foreach elem in chpl__serialViewIter(ptrToArr.deref()._instance, + domOrRange._instance) { yield elem; } } From 7bc9991f27b00ae48bda4ec088bdb07827ce5919 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 13 Jun 2024 15:19:01 -0700 Subject: [PATCH 27/88] Avoid dealing with slices of slices for the time being Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/forallOptimizations.cpp | 3 +++ modules/internal/ChapelArray.chpl | 10 +++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/compiler/optimizations/forallOptimizations.cpp b/compiler/optimizations/forallOptimizations.cpp index 99811729fdf6..8d7e171023d9 100644 --- a/compiler/optimizations/forallOptimizations.cpp +++ b/compiler/optimizations/forallOptimizations.cpp @@ -2477,6 +2477,7 @@ static void bulkViewTransfer() { if (call->isNamed("=")) { if (exprSuitableForProtoSlice(call->get(1)) && exprSuitableForProtoSlice(call->get(2))) { + //std::cout << call->stringLoc() << std::endl; //nprint_view(call); candidates.push_back(call); } @@ -2528,5 +2529,7 @@ static void bulkViewTransfer() { call->insertBefore(arrCond); elseBlock->insertAtTail(call->remove()); + + //list_view(arrCond); } } diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index febb19a981d5..e6a48aae54cf 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2526,6 +2526,8 @@ module ChapelArray { } } + // TODO can we allow const arrs to be passed here without breaking constness + // guarantees? proc chpl__createProtoSlice(ref Arr, slicingExprs) { return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } @@ -2536,7 +2538,13 @@ module ChapelArray { } proc chpl__basesSupportViewTransfer(a, b) param { - return chpl__isDROrDRView(a) && chpl__isDROrDRView(b); + /* + Want the following, but slices of slices caused some issues that I + couldn't fix on a short fuse. I don't think there's a major obstacle + there, though. + return chpl__isDROrDRView(a) && chpl__isDROrDRView(b); + */ + return a.isDefaultRectangular() && b.isDefaultRectangular(); } proc chpl__slicingExprsSupportViewTransfer(x...) param { From 7f391e7a395362fd0792955c4152e27a6ef8bcfa Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 13 Jun 2024 15:58:21 -0700 Subject: [PATCH 28/88] Apply the optimization only for range builders Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/forallOptimizations.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/compiler/optimizations/forallOptimizations.cpp b/compiler/optimizations/forallOptimizations.cpp index 8d7e171023d9..2a8e5ec4cf6a 100644 --- a/compiler/optimizations/forallOptimizations.cpp +++ b/compiler/optimizations/forallOptimizations.cpp @@ -2460,9 +2460,22 @@ static bool isLocalAccess(CallExpr *call) { static bool exprSuitableForProtoSlice(Expr* e) { if (CallExpr* call = toCallExpr(e)) { + bool callWithSuitableBase = false; if (SymExpr* callBase = toSymExpr(call->baseExpr)) { if (!isFnSymbol(callBase->symbol())) { - return true; + callWithSuitableBase = true; + } + } + + if (callWithSuitableBase) { + for_actuals (actual, call) { + if (CallExpr* actualCall = toCallExpr(actual)) { + if ( (actualCall->isNamed("chpl_build_bounded_range") || + actualCall->isNamed("chpl_build_unbounded_range") || + actualCall->isNamed("chpl_build_low_bounded_range")) ) { + return true; + } + } } } } From e73dcd0ce5d37e51abc02f2bf979422995c048f7 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 13 Jun 2024 16:19:00 -0700 Subject: [PATCH 29/88] Revert "Apply the optimization only for range builders" This reverts commit ea6376e6dcab4bfe86d1a00dd7806e481d0b13b0. Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/forallOptimizations.cpp | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/compiler/optimizations/forallOptimizations.cpp b/compiler/optimizations/forallOptimizations.cpp index 2a8e5ec4cf6a..8d7e171023d9 100644 --- a/compiler/optimizations/forallOptimizations.cpp +++ b/compiler/optimizations/forallOptimizations.cpp @@ -2460,22 +2460,9 @@ static bool isLocalAccess(CallExpr *call) { static bool exprSuitableForProtoSlice(Expr* e) { if (CallExpr* call = toCallExpr(e)) { - bool callWithSuitableBase = false; if (SymExpr* callBase = toSymExpr(call->baseExpr)) { if (!isFnSymbol(callBase->symbol())) { - callWithSuitableBase = true; - } - } - - if (callWithSuitableBase) { - for_actuals (actual, call) { - if (CallExpr* actualCall = toCallExpr(actual)) { - if ( (actualCall->isNamed("chpl_build_bounded_range") || - actualCall->isNamed("chpl_build_unbounded_range") || - actualCall->isNamed("chpl_build_low_bounded_range")) ) { - return true; - } - } + return true; } } } From f006f6940bbe043b4b1366db3a72ed3597790ae2 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 13 Jun 2024 17:29:56 -0700 Subject: [PATCH 30/88] Start improving the implementation Signed-off-by: Engin Kayraklioglu --- .../optimizations/forallOptimizations.cpp | 6 +- modules/internal/ChapelArray.chpl | 93 +++++++++++++++---- modules/internal/ChapelDomain.chpl | 34 ++++--- .../arrayViewElision/viewElisionPerf.chpl | 2 +- 4 files changed, 100 insertions(+), 35 deletions(-) diff --git a/compiler/optimizations/forallOptimizations.cpp b/compiler/optimizations/forallOptimizations.cpp index 8d7e171023d9..7f934a24609a 100644 --- a/compiler/optimizations/forallOptimizations.cpp +++ b/compiler/optimizations/forallOptimizations.cpp @@ -2477,8 +2477,8 @@ static void bulkViewTransfer() { if (call->isNamed("=")) { if (exprSuitableForProtoSlice(call->get(1)) && exprSuitableForProtoSlice(call->get(2))) { - //std::cout << call->stringLoc() << std::endl; - //nprint_view(call); + std::cout << call->stringLoc() << std::endl; + nprint_view(call); candidates.push_back(call); } } @@ -2530,6 +2530,6 @@ static void bulkViewTransfer() { call->insertBefore(arrCond); elseBlock->insertAtTail(call->remove()); - //list_view(arrCond); + list_view(arrCond); } } diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index e6a48aae54cf..b96835bff03f 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -80,6 +80,8 @@ module ChapelArray { @chpldoc.nodoc config param logAllArrEltAccess = false; + @chpldoc.nodoc + config param debugShortArrayTransferOpt = false; @chpldoc.nodoc config param disableShortArrayTransferOpt = false; @chpldoc.nodoc @@ -2340,6 +2342,10 @@ module ChapelArray { private inline proc chpl__dynamicCheckShortArrayTransfer(a, b) { param localCompilation = _local && CHPL_LOCALE_MODEL=="flat"; const sizeOk = a.sizeAs(uint) < shortArrayTransferThreshold; + if debugShortArrayTransferOpt { + chpl_debug_writeln(" Size: ", a.sizeAs(uint), + " Threshold: ", shortArrayTransferThreshold); + } if localCompilation { return sizeOk; } @@ -2359,6 +2365,8 @@ module ChapelArray { } else if chpl__staticCheckShortArrayTransfer(a, b) && chpl__dynamicCheckShortArrayTransfer(a, b) { + if debugShortArrayTransferOpt then + chpl_debug_writeln("Will perform a short array transfer"); chpl__transferArray(a, b, kind, alwaysSerialize=true); } else if chpl__compatibleForBulkTransfer(a, b, kind) { @@ -2444,32 +2452,66 @@ module ChapelArray { return chpl__bulkTransferArray(a._value, AD, b._value, BD); } + + private proc allBounded(ranges: range) param { + return ranges.bounds == boundKind.both; + } + + private proc allBounded(ranges) param { + if chpl__isTupleOfRanges(ranges) { + for i in 0..1 { - return {(...slicingExprs)}; - } - - inline proc dims() where rank == 1 { - return (slicingExprs,); + return {(...ranges)}; } inline proc dims() { - return slicingExprs; + return ranges; } inline proc rank param { return ptrToArr.deref().rank; } @@ -2477,13 +2519,13 @@ module ChapelArray { inline proc _value { return ptrToArr.deref()._value; } inline proc sizeAs(type t) where rank==1 { - return slicingExprs.sizeAs(t); + return ranges.sizeAs(t); } inline proc sizeAs(type t) { var size = 1:t; for param r in 0.. Date: Fri, 14 Jun 2024 16:25:28 -0700 Subject: [PATCH 31/88] Significantly tighten implementation by moving some of the logic to the compiler Signed-off-by: Engin Kayraklioglu --- compiler/AST/primitive.cpp | 2 + compiler/include/forallOptimizations.h | 28 ++++ .../optimizations/forallOptimizations.cpp | 151 +++++++++++++++--- compiler/resolution/preFold.cpp | 19 +++ frontend/include/chpl/uast/PragmaList.h | 4 + frontend/include/chpl/uast/prim-ops-list.h | 2 + frontend/lib/resolution/prims.cpp | 1 + modules/internal/ChapelArray.chpl | 60 ++++--- 8 files changed, 221 insertions(+), 46 deletions(-) diff --git a/compiler/AST/primitive.cpp b/compiler/AST/primitive.cpp index 31c8854bb839..29248f224a48 100644 --- a/compiler/AST/primitive.cpp +++ b/compiler/AST/primitive.cpp @@ -1070,6 +1070,8 @@ initPrimitive() { prim_def(PRIM_MAYBE_LOCAL_ARR_ELEM, "may be local array element", returnInfoUnknown); prim_def(PRIM_MAYBE_AGGREGATE_ASSIGN, "may be aggregated assignment", returnInfoUnknown); + prim_def(PRIM_PROTO_SLICE_ASSIGN, "assign proto slices", returnInfoVoid); + prim_def(PRIM_ERROR, "error", returnInfoVoid, true); prim_def(PRIM_WARNING, "warning", returnInfoVoid, true); prim_def(PRIM_WHEN, "when case expressions", returnInfoVoid); diff --git a/compiler/include/forallOptimizations.h b/compiler/include/forallOptimizations.h index ced0511338cc..656c3c4873d4 100644 --- a/compiler/include/forallOptimizations.h +++ b/compiler/include/forallOptimizations.h @@ -77,4 +77,32 @@ void removeAggregationFromRecursiveForall(ForallStmt *forall); void transformConditionalAggregation(CondStmt *cond); void cleanupRemainingAggCondStmts(); +class ProtoSliceAssignHelper { +public: + ProtoSliceAssignHelper() = delete; + ProtoSliceAssignHelper(CallExpr* call); + ~ProtoSliceAssignHelper(); + + inline CondStmt* condStmt() const { return condStmt_; } + inline Expr* flag() const { return condStmt_->condExpr; } + inline bool supported() const { return supported_; } + inline BlockStmt* staticCheckBlock() const { return staticCheckBlock_; } + + CallExpr* getReplacement(); + +private: + CallExpr* call_; + CallExpr* newProtoSliceLhs_; + CallExpr* newProtoSliceRhs_; + CondStmt* condStmt_; + Symbol* tmpCondFlag_; + bool supported_; + BlockStmt* staticCheckBlock_; + + void findCondStmt(); + void findProtoSlices(); + bool handleOneProtoSlice(CallExpr* call); + CallExpr* findOneProtoSliceCall(Expr* e); +}; + #endif diff --git a/compiler/optimizations/forallOptimizations.cpp b/compiler/optimizations/forallOptimizations.cpp index 7f934a24609a..7f7bb2ff1e58 100644 --- a/compiler/optimizations/forallOptimizations.cpp +++ b/compiler/optimizations/forallOptimizations.cpp @@ -2469,6 +2469,120 @@ static bool exprSuitableForProtoSlice(Expr* e) { return false; } +ProtoSliceAssignHelper::ProtoSliceAssignHelper(CallExpr* call): + call_(call), + newProtoSliceLhs_(nullptr), + newProtoSliceRhs_(nullptr), + condStmt_(nullptr), + supported_(false), + staticCheckBlock_(nullptr) { + + findProtoSlices(); + INT_ASSERT(newProtoSliceLhs_); + INT_ASSERT(newProtoSliceRhs_); + + + // this is just a temporary block. we add some AST in it, resolve and then + // remove them. I cannot be sure if those operations could leave any AST in. + // So, when we destroy this helper, we'll remove this block just to be sure. + staticCheckBlock_ = new BlockStmt(); + + BlockStmt* parentBlock = toBlockStmt(call->parentExpr); + parentBlock->insertAtHead(staticCheckBlock_); + + supported_ = handleOneProtoSlice(newProtoSliceLhs_) && + handleOneProtoSlice(newProtoSliceRhs_); + + findCondStmt(); + INT_ASSERT(condStmt_); +} + +ProtoSliceAssignHelper::~ProtoSliceAssignHelper() { + staticCheckBlock_->remove(); + tmpCondFlag_->getSingleDef()->getStmtExpr()->remove(); + tmpCondFlag_->defPoint->remove(); +} + +CallExpr* ProtoSliceAssignHelper::getReplacement() { + return new CallExpr("=", call_->get(1)->copy(), call_->get(2)->copy()); +} + +bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call) { + INT_ASSERT(call->isNamed("chpl__createProtoSlice")); + + CallExpr* typeCheck = new CallExpr("chpl__typesSupportArrayViewElision"); + for_actuals (actual, call) { + typeCheck->insertAtTail(actual->typeInfo()->symbol); + } + + VarSymbol* tmp = newTemp("call_tmp", dtBool); + DefExpr* flagDef = new DefExpr(tmp, typeCheck); + + staticCheckBlock_->insertAtTail(flagDef); + + resolveExpr(typeCheck); + resolveExpr(flagDef); + + bool ret = (toSymExpr(flagDef->init)->symbol() == gTrue); + + flagDef->remove(); + + return ret; +} + +// e must be the lhs or rhs of PRIM_ASSIGN_PROTO_SLICES +// returns the `chpl__createProtoSlice call +CallExpr* ProtoSliceAssignHelper::findOneProtoSliceCall(Expr* e) { + SymExpr* lhsSE = toSymExpr(call_->get(1)); + INT_ASSERT(lhsSE); + + Symbol* lhs = lhsSE->symbol(); + CallExpr* lhsTmpMove = toCallExpr(lhs->getSingleDef()->getStmtExpr()); + INT_ASSERT(lhsTmpMove && lhsTmpMove->isPrimitive(PRIM_MOVE)); + + SymExpr* lhsTmpSE = toSymExpr(lhsTmpMove->get(2)); + INT_ASSERT(lhsTmpSE); + + Symbol* lhsTmpSym = lhsTmpSE->symbol(); + CallExpr* lhsMove = toCallExpr(lhsTmpSym->getSingleDef()->getStmtExpr()); + INT_ASSERT(lhsMove && lhsMove->isPrimitive(PRIM_MOVE)); + + return toCallExpr(lhsMove->get(2)); +} + +void ProtoSliceAssignHelper::findProtoSlices() { + newProtoSliceLhs_ = findOneProtoSliceCall(call_->get(1)); + newProtoSliceRhs_ = findOneProtoSliceCall(call_->get(2)); +} + +void ProtoSliceAssignHelper::findCondStmt() { + Expr* cur = call_; + while (cur) { + if (CondStmt* condStmt = toCondStmt(cur)) { + if (SymExpr* condExpr = toSymExpr(condStmt->condExpr)) { + if (condExpr->symbol()->hasFlag(FLAG_ARRAYVIEW_ELISION_FLAG)) { + tmpCondFlag_ = condExpr->symbol(); + condStmt_ = condStmt; + break; + } + else { + // this is an unknown conditional, this shouldn't have happened + INT_FATAL(call_, + "unexpected syntax tree generated by arrayview elision"); + } + } + } + + cur = cur->parentExpr; + } + + if (condStmt_ == nullptr) { + // where is the conditional? + INT_FATAL(call_, + "unexpected syntax tree generated by arrayview elision"); + } +} + static void bulkViewTransfer() { std::vector candidates; @@ -2477,8 +2591,8 @@ static void bulkViewTransfer() { if (call->isNamed("=")) { if (exprSuitableForProtoSlice(call->get(1)) && exprSuitableForProtoSlice(call->get(2))) { - std::cout << call->stringLoc() << std::endl; - nprint_view(call); + //std::cout << call->stringLoc() << std::endl; + //nprint_view(call); candidates.push_back(call); } } @@ -2494,26 +2608,21 @@ static void bulkViewTransfer() { Expr* lhsBase = lhs->baseExpr; Expr* rhsBase = rhs->baseExpr; - - CallExpr* arrCheck = new CallExpr("chpl__basesSupportViewTransfer", - lhsBase->copy(), rhsBase->copy()); - CallExpr* slicingExprCheck = - new CallExpr("chpl__slicingExprsSupportViewTransfer"); - - CallExpr* lhsPSCall = new CallExpr("chpl__createProtoSlice", - lhsBase->copy()); - CallExpr* rhsPSCall = new CallExpr("chpl__createProtoSlice", - rhsBase->copy()); - + CallExpr* lhsPSCall = new CallExpr("chpl__createProtoSlice", lhsBase->copy()); for_actuals(actual, lhs) { - slicingExprCheck->insertAtTail(actual->copy()); lhsPSCall->insertAtTail(actual->copy()); } + + CallExpr* rhsPSCall = new CallExpr("chpl__createProtoSlice", rhsBase->copy()); for_actuals(actual, rhs) { - slicingExprCheck->insertAtTail(actual->copy()); rhsPSCall->insertAtTail(actual->copy()); } - CallExpr* condExpr = new CallExpr("&&", arrCheck, slicingExprCheck); + + // arrayview elision placeholder + VarSymbol* placeholder = new VarSymbol("arrayview_elision_flag", dtBool); + placeholder->addFlag(FLAG_ARRAYVIEW_ELISION_FLAG); + + call->insertBefore(new DefExpr(placeholder, gFalse)); BlockStmt* thenBlock = new BlockStmt(); VarSymbol* lhsPS = new VarSymbol("lhs_proto_slice"); @@ -2521,15 +2630,17 @@ static void bulkViewTransfer() { thenBlock->insertAtTail(new DefExpr(lhsPS, lhsPSCall)); thenBlock->insertAtTail(new DefExpr(rhsPS, rhsPSCall)); - thenBlock->insertAtTail(new CallExpr("=", lhsPS, rhsPS)); + thenBlock->insertAtTail(new CallExpr(PRIM_PROTO_SLICE_ASSIGN, lhsPS, + rhsPS)); BlockStmt* elseBlock = new BlockStmt(); - CondStmt* arrCond = new CondStmt(condExpr, thenBlock, elseBlock); + CondStmt* cond = new CondStmt(new SymExpr(placeholder), thenBlock, + elseBlock); - call->insertBefore(arrCond); + call->insertBefore(cond); elseBlock->insertAtTail(call->remove()); - list_view(arrCond); + //list_view(cond); } } diff --git a/compiler/resolution/preFold.cpp b/compiler/resolution/preFold.cpp index cf4cbb0f285d..21c8a8906fb0 100644 --- a/compiler/resolution/preFold.cpp +++ b/compiler/resolution/preFold.cpp @@ -916,6 +916,25 @@ static Expr* preFoldPrimOp(CallExpr* call) { break; } + case PRIM_PROTO_SLICE_ASSIGN: { + ProtoSliceAssignHelper assignment(call); + + if (assignment.supported()) { + retval = assignment.getReplacement(); + call->replace(retval); + assignment.flag()->replace(new SymExpr(gTrue)); + } + else { + retval = new CallExpr(PRIM_NOOP); + assignment.condStmt()->insertBefore(retval); + assignment.flag()->replace(new SymExpr(gFalse)); + } + + assignment.condStmt()->foldConstantCondition(/*addEndOfStatement*/ false); + + break; + } + case PRIM_CALL_RESOLVES: case PRIM_CALL_AND_FN_RESOLVES: case PRIM_METHOD_CALL_RESOLVES: diff --git a/frontend/include/chpl/uast/PragmaList.h b/frontend/include/chpl/uast/PragmaList.h index beebfe3c067f..05491351c890 100644 --- a/frontend/include/chpl/uast/PragmaList.h +++ b/frontend/include/chpl/uast/PragmaList.h @@ -52,6 +52,10 @@ PRAGMA(ALIASING_ARRAY, ypr, "aliasing array", ncm) // can alias the same scopes as 'this' PRAGMA(ALIAS_SCOPE_FROM_THIS, ypr, "alias scope from this", ncm) +// Added to the condExpr of a static ArrayView Elision conditional. Should +// disappear after resolution +PRAGMA(ARRAYVIEW_ELISION_FLAG, npr, "static flag for arrayview elision", ncm) + // This flag is used in scalarReplace.cpp to determine if an assignment of a ref // has an allocator as the RHS. If so, then it is not creating an alias, since // the allocator function does not retain a reference to the referenced object. diff --git a/frontend/include/chpl/uast/prim-ops-list.h b/frontend/include/chpl/uast/prim-ops-list.h index 1285e1c2bae0..aaeb75586e99 100644 --- a/frontend/include/chpl/uast/prim-ops-list.h +++ b/frontend/include/chpl/uast/prim-ops-list.h @@ -230,6 +230,8 @@ PRIMITIVE_R(MAYBE_LOCAL_THIS, "may be local access") PRIMITIVE_R(MAYBE_LOCAL_ARR_ELEM, "may be local array element") PRIMITIVE_R(MAYBE_AGGREGATE_ASSIGN, "may be aggregated assignment") +PRIMITIVE_R(PROTO_SLICE_ASSIGN, "assign proto slices") + PRIMITIVE_R(ERROR, "error") PRIMITIVE_R(WARNING, "warning") PRIMITIVE_R(WHEN, "when case expressions") diff --git a/frontend/lib/resolution/prims.cpp b/frontend/lib/resolution/prims.cpp index 173c5080b276..606e4e1d0d00 100644 --- a/frontend/lib/resolution/prims.cpp +++ b/frontend/lib/resolution/prims.cpp @@ -1696,6 +1696,7 @@ CallResolutionResult resolvePrimCall(Context* context, case PRIM_MAYBE_LOCAL_THIS: case PRIM_MAYBE_LOCAL_ARR_ELEM: case PRIM_MAYBE_AGGREGATE_ASSIGN: + case PRIM_PROTO_SLICE_ASSIGN: case PRIM_BLOCK_PARAM_LOOP: case PRIM_BLOCK_WHILEDO_LOOP: case PRIM_BLOCK_DOWHILE_LOOP: diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index b96835bff03f..91f68adaba79 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2510,10 +2510,14 @@ module ChapelArray { return {(...ranges)}; } - inline proc dims() { + inline proc dims() where chpl__isTupleOfRanges(this.ranges) { return ranges; } + inline proc dims() { + return (ranges,); + } + inline proc rank param { return ptrToArr.deref().rank; } inline proc eltType type { return ptrToArr.deref().eltType; } inline proc _value { return ptrToArr.deref()._value; } @@ -2570,15 +2574,30 @@ module ChapelArray { // TODO can we allow const arrs to be passed here without breaking constness // guarantees? - proc chpl__createProtoSlice(ref Arr, slicingExprs) { + // TODO we can also accept domains and ints (rank-change) + proc chpl__createProtoSlice(ref Arr, slicingExprs: range) { return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } pragma "last resort" - proc chpl__createProtoSlice(ref Arr, slicingExprs...) { + proc chpl__createProtoSlice(ref Arr, slicingExprs:range ...) { return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } + proc chpl__createProtoSlice(ref Arr, slicingExprs) { + // this is an array access. This call will be eliminated later in + // resolution, but we want it to live for a bit for easier resolution + return 0; + } + + pragma "last resort" + proc chpl__createProtoSlice(ref Arr, slicingExprs... ) { + // this is an array access. This call will be eliminated later in + // resolution, but we want it to live for a bit for easier resolution + return 0; + } + + proc chpl__exprSupportsViewTransfer(base, exprs...) param { if base.isDefaultRectangular() { @@ -2589,31 +2608,20 @@ module ChapelArray { return false; } - proc chpl__basesSupportViewTransfer(a, b) param { - /* - Want the following, but slices of slices caused some issues that I - couldn't fix on a short fuse. I don't think there's a major obstacle - there, though. - return chpl__isDROrDRView(a) && chpl__isDROrDRView(b); - */ - return a.isDefaultRectangular() && b.isDefaultRectangular(); - } - - proc chpl__slicingExprsSupportViewTransfer(x...) param { - /*compilerWarning(x.type:string);*/ - if isHomogeneousTuple(x) && isRange(x[0]) { - /*for param i in 0.. Date: Fri, 14 Jun 2024 16:40:41 -0700 Subject: [PATCH 32/88] Fix an easy mistake Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 91f68adaba79..6852f20d2c10 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2459,7 +2459,7 @@ module ChapelArray { private proc allBounded(ranges) param { if chpl__isTupleOfRanges(ranges) { - for i in 0.. Date: Tue, 25 Jun 2024 21:25:43 -0700 Subject: [PATCH 33/88] Stricter control for helper functions Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 34 +++++++++++++++++++------------ 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 6852f20d2c10..cd67fa5f6b4a 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2575,12 +2575,14 @@ module ChapelArray { // TODO can we allow const arrs to be passed here without breaking constness // guarantees? // TODO we can also accept domains and ints (rank-change) - proc chpl__createProtoSlice(ref Arr, slicingExprs: range) { + proc chpl__createProtoSlice(ref Arr, slicingExprs: range) + where chpl__baseTypeSupportAVE(Arr.type) { return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } pragma "last resort" - proc chpl__createProtoSlice(ref Arr, slicingExprs:range ...) { + proc chpl__createProtoSlice(ref Arr, slicingExprs:range ...) + where chpl__baseTypeSupportAVE(Arr.type) { return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } @@ -2608,20 +2610,26 @@ module ChapelArray { return false; } - proc chpl__typesSupportArrayViewElision(type baseType, - type indexingTypes...) param: bool { + proc chpl__baseTypeSupportAVE(type baseType) param: bool { var dummy: baseType; - if isArrayType(baseType) && - isSubtype(dummy._instance.type, DefaultRectangularArr) { - for param tid in 0.. Date: Tue, 25 Jun 2024 21:38:07 -0700 Subject: [PATCH 34/88] Fix an easy bug Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index cd67fa5f6b4a..8eba2cb989ca 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2611,10 +2611,11 @@ module ChapelArray { } proc chpl__baseTypeSupportAVE(type baseType) param: bool { + import Reflection; var dummy: baseType; return isArrayType(baseType) && isSubtype(dummy._instance.type, DefaultRectangularArr) && - canResolve("c_addrOf", dummy); + Reflection.canResolve("c_addrOf", dummy); } proc chpl__indexingExprsSupportAVE(type indexingTypes...) param: bool { From 4945bc9b5a1afa187452b5cd221149a4bbd7738c Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Tue, 25 Jun 2024 21:38:09 -0700 Subject: [PATCH 35/88] Add test Signed-off-by: Engin Kayraklioglu --- .../optimizations/arrayViewElision/basic.chpl | 39 +++++++++++++++++++ .../arrayViewElision/basic.compopts | 1 + .../optimizations/arrayViewElision/basic.good | 28 +++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 test/optimizations/arrayViewElision/basic.chpl create mode 100644 test/optimizations/arrayViewElision/basic.compopts create mode 100644 test/optimizations/arrayViewElision/basic.good diff --git a/test/optimizations/arrayViewElision/basic.chpl b/test/optimizations/arrayViewElision/basic.chpl new file mode 100644 index 000000000000..4fff7ca551ef --- /dev/null +++ b/test/optimizations/arrayViewElision/basic.chpl @@ -0,0 +1,39 @@ +var A: [1..10] int = 1; +var B: [1..10] int = 2; + +proc test(dstRange, srcRange) { + A[dstRange] = B[srcRange]; + writeln(A); + A = 1; // reset +} + +writeln("Set first two:"); +test(1..2, 1..2); +test(1..2, 9..10); +test(1..2, ..2); +test(1..2, 9..); +test(..2, 1..2); +test(..2, 8..9); +test(..2, ..2); +test(..2, 9..); + +writeln("Set last two:"); +test(9..10, 1..2); +test(9..10, 9..10); +test(9..10, ..2); +test(9..10, 9..); +test(9.., 1..2); +test(9.., 9..10); +test(9.., ..2); +test(9.., 9..); + +writeln("Set all:"); +test(1..10, 1..10); +test(1..10, 1..); +test(1..10, ..); +test(1.., 1..10); +test(1.., 1..); +test(1.., ..); +test(.., 1..10); +test(.., 1..); +test(.., ..); diff --git a/test/optimizations/arrayViewElision/basic.compopts b/test/optimizations/arrayViewElision/basic.compopts new file mode 100644 index 000000000000..037ae1ee3cae --- /dev/null +++ b/test/optimizations/arrayViewElision/basic.compopts @@ -0,0 +1 @@ +-sdebugShortArrayTransferOpt=true diff --git a/test/optimizations/arrayViewElision/basic.good b/test/optimizations/arrayViewElision/basic.good new file mode 100644 index 000000000000..18c5da2346ae --- /dev/null +++ b/test/optimizations/arrayViewElision/basic.good @@ -0,0 +1,28 @@ +Set first two: +2 2 1 1 1 1 1 1 1 1 +2 2 1 1 1 1 1 1 1 1 +2 2 1 1 1 1 1 1 1 1 +2 2 1 1 1 1 1 1 1 1 +2 2 1 1 1 1 1 1 1 1 +2 2 1 1 1 1 1 1 1 1 +2 2 1 1 1 1 1 1 1 1 +2 2 1 1 1 1 1 1 1 1 +Set last two: +1 1 1 1 1 1 1 1 2 2 +1 1 1 1 1 1 1 1 2 2 +1 1 1 1 1 1 1 1 2 2 +1 1 1 1 1 1 1 1 2 2 +1 1 1 1 1 1 1 1 2 2 +1 1 1 1 1 1 1 1 2 2 +1 1 1 1 1 1 1 1 2 2 +1 1 1 1 1 1 1 1 2 2 +Set all: +2 2 2 2 2 2 2 2 2 2 +2 2 2 2 2 2 2 2 2 2 +2 2 2 2 2 2 2 2 2 2 +2 2 2 2 2 2 2 2 2 2 +2 2 2 2 2 2 2 2 2 2 +2 2 2 2 2 2 2 2 2 2 +2 2 2 2 2 2 2 2 2 2 +2 2 2 2 2 2 2 2 2 2 +2 2 2 2 2 2 2 2 2 2 From 68fd8f38c1bf732203b830012f3e569315f48b16 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Tue, 25 Jun 2024 22:18:47 -0700 Subject: [PATCH 36/88] Fix some more resolution issues Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 45 ++++++++++++----- .../optimizations/arrayViewElision/basic.good | 50 +++++++++++++++++++ 2 files changed, 83 insertions(+), 12 deletions(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 8eba2cb989ca..f7397f0396b0 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2345,6 +2345,10 @@ module ChapelArray { if debugShortArrayTransferOpt { chpl_debug_writeln(" Size: ", a.sizeAs(uint), " Threshold: ", shortArrayTransferThreshold); + if sizeOk then + chpl_debug_writeln(" size qualifies"); + else + chpl_debug_writeln(" size doesn't qualify"); } if localCompilation { return sizeOk; @@ -2352,9 +2356,14 @@ module ChapelArray { else { // No `.locale` to avoid overheads. Note that this is an optimization for // fast-running code. Small things matter. - return sizeOk && - __primitive("_wide_get_locale", a) == - __primitive("_wide_get_locale", b); + const sameLocale = __primitive("_wide_get_locale", a) == + __primitive("_wide_get_locale", b); + if sameLocale then + chpl_debug_writeln(" locality qualifies"); + else + chpl_debug_writeln(" locality does not qualify"); + + return sizeOk && sameLocale; } } @@ -2365,8 +2374,6 @@ module ChapelArray { } else if chpl__staticCheckShortArrayTransfer(a, b) && chpl__dynamicCheckShortArrayTransfer(a, b) { - if debugShortArrayTransferOpt then - chpl_debug_writeln("Will perform a short array transfer"); chpl__transferArray(a, b, kind, alwaysSerialize=true); } else if chpl__compatibleForBulkTransfer(a, b, kind) { @@ -2453,7 +2460,7 @@ module ChapelArray { } - private proc allBounded(ranges: range) param { + private proc allBounded(ranges: range(?)) param { return ranges.bounds == boundKind.both; } @@ -2476,6 +2483,18 @@ module ChapelArray { var ptrToArr; // I want this to be a `forwarding ref` to the array var ranges; + proc init() { + // this constructor is called to create dummy protoSlices that will never + // be used and removed from the AST. + this.rank = 1; + this.idxType = int; + + var dummyArr = [1,]; + this.ptrToArr = c_addrOf(dummyArr); + this.ranges = (1..0,); + compilerWarning("created a dummy slice"); + } + proc init(ptrToArr, slicingExprs) { this.rank = ptrToArr.deref().rank; this.idxType = ptrToArr.deref().idxType; @@ -2488,7 +2507,8 @@ module ChapelArray { } else { this.ranges = tupleOfRangesSlice(ptrToArr.deref().dims(), - (slicingExprs,)); + (slicingExprs,))[0]; + // [0] at the end makes it a range instead of tuple of ranges } } @@ -2575,28 +2595,29 @@ module ChapelArray { // TODO can we allow const arrs to be passed here without breaking constness // guarantees? // TODO we can also accept domains and ints (rank-change) - proc chpl__createProtoSlice(ref Arr, slicingExprs: range) + proc chpl__createProtoSlice(ref Arr, slicingExprs: range(?)) where chpl__baseTypeSupportAVE(Arr.type) { return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } pragma "last resort" - proc chpl__createProtoSlice(ref Arr, slicingExprs:range ...) + proc chpl__createProtoSlice(ref Arr, slicingExprs:range(?) ...) where chpl__baseTypeSupportAVE(Arr.type) { return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } + pragma "last resort" proc chpl__createProtoSlice(ref Arr, slicingExprs) { // this is an array access. This call will be eliminated later in // resolution, but we want it to live for a bit for easier resolution - return 0; + return new chpl__protoSlice(); } pragma "last resort" proc chpl__createProtoSlice(ref Arr, slicingExprs... ) { // this is an array access. This call will be eliminated later in // resolution, but we want it to live for a bit for easier resolution - return 0; + return new chpl__protoSlice(); } @@ -2630,7 +2651,7 @@ module ChapelArray { proc chpl__typesSupportArrayViewElision(type baseType, type indexingTypes...) param: bool { return chpl__baseTypeSupportAVE(baseType) && - chpl__indexingExprsSupportAVE(indexingTypes); + chpl__indexingExprsSupportAVE((...indexingTypes)); } inline proc chpl__bulkTransferArray(destClass, destView, srcClass, srcView) { diff --git a/test/optimizations/arrayViewElision/basic.good b/test/optimizations/arrayViewElision/basic.good index 18c5da2346ae..058a1b4f8a13 100644 --- a/test/optimizations/arrayViewElision/basic.good +++ b/test/optimizations/arrayViewElision/basic.good @@ -1,28 +1,78 @@ Set first two: + Size: 2 Threshold: 50 + size qualifies 2 2 1 1 1 1 1 1 1 1 + Size: 2 Threshold: 50 + size qualifies 2 2 1 1 1 1 1 1 1 1 + Size: 2 Threshold: 50 + size qualifies 2 2 1 1 1 1 1 1 1 1 + Size: 2 Threshold: 50 + size qualifies 2 2 1 1 1 1 1 1 1 1 + Size: 2 Threshold: 50 + size qualifies 2 2 1 1 1 1 1 1 1 1 + Size: 2 Threshold: 50 + size qualifies 2 2 1 1 1 1 1 1 1 1 + Size: 2 Threshold: 50 + size qualifies 2 2 1 1 1 1 1 1 1 1 + Size: 2 Threshold: 50 + size qualifies 2 2 1 1 1 1 1 1 1 1 Set last two: + Size: 2 Threshold: 50 + size qualifies 1 1 1 1 1 1 1 1 2 2 + Size: 2 Threshold: 50 + size qualifies 1 1 1 1 1 1 1 1 2 2 + Size: 2 Threshold: 50 + size qualifies 1 1 1 1 1 1 1 1 2 2 + Size: 2 Threshold: 50 + size qualifies 1 1 1 1 1 1 1 1 2 2 + Size: 2 Threshold: 50 + size qualifies 1 1 1 1 1 1 1 1 2 2 + Size: 2 Threshold: 50 + size qualifies 1 1 1 1 1 1 1 1 2 2 + Size: 2 Threshold: 50 + size qualifies 1 1 1 1 1 1 1 1 2 2 + Size: 2 Threshold: 50 + size qualifies 1 1 1 1 1 1 1 1 2 2 Set all: + Size: 10 Threshold: 50 + size qualifies 2 2 2 2 2 2 2 2 2 2 + Size: 10 Threshold: 50 + size qualifies 2 2 2 2 2 2 2 2 2 2 + Size: 10 Threshold: 50 + size qualifies 2 2 2 2 2 2 2 2 2 2 + Size: 10 Threshold: 50 + size qualifies 2 2 2 2 2 2 2 2 2 2 + Size: 10 Threshold: 50 + size qualifies 2 2 2 2 2 2 2 2 2 2 + Size: 10 Threshold: 50 + size qualifies 2 2 2 2 2 2 2 2 2 2 + Size: 10 Threshold: 50 + size qualifies 2 2 2 2 2 2 2 2 2 2 + Size: 10 Threshold: 50 + size qualifies 2 2 2 2 2 2 2 2 2 2 + Size: 10 Threshold: 50 + size qualifies 2 2 2 2 2 2 2 2 2 2 From 2630c7a6e353f863defd9fca93ec8d1258fb6f52 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Tue, 25 Jun 2024 22:25:17 -0700 Subject: [PATCH 37/88] Refactor a separate module for short array optimization support Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 43 ++----------------------------- 1 file changed, 2 insertions(+), 41 deletions(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index f7397f0396b0..6693a7de0feb 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -80,13 +80,6 @@ module ChapelArray { @chpldoc.nodoc config param logAllArrEltAccess = false; - @chpldoc.nodoc - config param debugShortArrayTransferOpt = false; - @chpldoc.nodoc - config param disableShortArrayTransferOpt = false; - @chpldoc.nodoc - config const shortArrayTransferThreshold = 50; - proc _isPrivatized(value) param do return (!compiledForSingleLocale() || CHPL_LOCALE_MODEL=="gpu") && ((_privatization && value!.dsiSupportsPrivatization()) || @@ -2333,42 +2326,10 @@ module ChapelArray { } } - private proc chpl__staticCheckShortArrayTransfer(a, b) param { - // this is the case I'm focusing on in the initial PR. This can definitely - // be loosened up... by a lot. - return !disableShortArrayTransferOpt && isProtoSlice(a) && isProtoSlice(b); - } - - private inline proc chpl__dynamicCheckShortArrayTransfer(a, b) { - param localCompilation = _local && CHPL_LOCALE_MODEL=="flat"; - const sizeOk = a.sizeAs(uint) < shortArrayTransferThreshold; - if debugShortArrayTransferOpt { - chpl_debug_writeln(" Size: ", a.sizeAs(uint), - " Threshold: ", shortArrayTransferThreshold); - if sizeOk then - chpl_debug_writeln(" size qualifies"); - else - chpl_debug_writeln(" size doesn't qualify"); - } - if localCompilation { - return sizeOk; - } - else { - // No `.locale` to avoid overheads. Note that this is an optimization for - // fast-running code. Small things matter. - const sameLocale = __primitive("_wide_get_locale", a) == - __primitive("_wide_get_locale", b); - if sameLocale then - chpl_debug_writeln(" locality qualifies"); - else - chpl_debug_writeln(" locality does not qualify"); - - return sizeOk && sameLocale; - } - } - pragma "find user line" inline proc chpl__uncheckedArrayTransfer(ref a, b, param kind) { + use ChapelShortArrayTransfer; + if chpl__serializeAssignment(a, b) { chpl__transferArray(a, b, kind); } From a7c0df3fedfac961e424b79a15c929fb36b0481b Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Tue, 25 Jun 2024 22:34:52 -0700 Subject: [PATCH 38/88] Continue the module refactor Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 188 +-------------- modules/internal/ChapelArrayViewElision.chpl | 224 ++++++++++++++++++ .../internal/ChapelShortArrayTransfer.chpl | 67 ++++++ modules/internal/ChapelStandard.chpl | 1 + 4 files changed, 293 insertions(+), 187 deletions(-) create mode 100644 modules/internal/ChapelArrayViewElision.chpl create mode 100644 modules/internal/ChapelShortArrayTransfer.chpl diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 6693a7de0feb..d03651b7e593 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -34,6 +34,7 @@ module ChapelArray { use CTypes; use ChapelPrivatization; use ChplConfig only compiledForSingleLocale, CHPL_LOCALE_MODEL; + use ChapelArrayViewElision; public use ChapelDomain; // Explicitly use a processor atomic, as most calls to this function are @@ -2160,16 +2161,6 @@ module ChapelArray { } } - proc isProtoSlice(a) param { return isSubtype(a.type, chpl__protoSlice); } - - proc isArrayOrProtoSlice(a) param { - return isArray(a) || isProtoSlice(a); - } - - proc areBothArraysOrProtoSlices(a, b) { - return isArrayOrProtoSlice(a) && isArrayOrProtoSlice(b); - } - private inline proc arrayOrProtoSliceAssign(ref a, b) { if a.rank != b.rank then compilerError("rank mismatch in array assignment"); @@ -2438,183 +2429,6 @@ module ChapelArray { return false; } - record chpl__protoSlice { - param rank; - type idxType; - var ptrToArr; // I want this to be a `forwarding ref` to the array - var ranges; - - proc init() { - // this constructor is called to create dummy protoSlices that will never - // be used and removed from the AST. - this.rank = 1; - this.idxType = int; - - var dummyArr = [1,]; - this.ptrToArr = c_addrOf(dummyArr); - this.ranges = (1..0,); - compilerWarning("created a dummy slice"); - } - - proc init(ptrToArr, slicingExprs) { - this.rank = ptrToArr.deref().rank; - this.idxType = ptrToArr.deref().idxType; - this.ptrToArr = ptrToArr; - if allBounded(slicingExprs) { - this.ranges = slicingExprs; - } - else if chpl__isTupleOfRanges(slicingExprs) { - this.ranges = tupleOfRangesSlice(ptrToArr.deref().dims(), slicingExprs); - } - else { - this.ranges = tupleOfRangesSlice(ptrToArr.deref().dims(), - (slicingExprs,))[0]; - // [0] at the end makes it a range instead of tuple of ranges - } - } - - proc init=(other: chpl__protoSlice) { - this.rank = other.rank; - this.idxType = other.idxType; - this.ptrToArr = other.ptrToArr; - this.ranges = other.ranges; - init this; - extern proc printf(s...); - printf("this is probably not what you want\n"); - } - - inline proc domOrRange where rank==1 { - return ranges; - } - - inline proc domOrRange where rank>1 { - return {(...ranges)}; - } - - inline proc dims() where chpl__isTupleOfRanges(this.ranges) { - return ranges; - } - - inline proc dims() { - return (ranges,); - } - - inline proc rank param { return ptrToArr.deref().rank; } - inline proc eltType type { return ptrToArr.deref().eltType; } - inline proc _value { return ptrToArr.deref()._value; } - - inline proc sizeAs(type t) where rank==1 { - return ranges.sizeAs(t); - } - - inline proc sizeAs(type t) { - var size = 1:t; - for param r in 0..1 { + return {(...ranges)}; + } + + inline proc dims() where chpl__isTupleOfRanges(this.ranges) { + return ranges; + } + + inline proc dims() { + return (ranges,); + } + + inline proc rank param { return ptrToArr.deref().rank; } + inline proc eltType type { return ptrToArr.deref().eltType; } + inline proc _value { return ptrToArr.deref()._value; } + + inline proc sizeAs(type t) where rank==1 { + return ranges.sizeAs(t); + } + + inline proc sizeAs(type t) { + var size = 1:t; + for param r in 0.. Size: ", a.sizeAs(uint), + " Threshold: ", shortArrayTransferThreshold); + if sizeOk then + chpl_debug_writeln(" size qualifies"); + else + chpl_debug_writeln(" size doesn't qualify"); + } + if localCompilation { + return sizeOk; + } + else { + // No `.locale` to avoid overheads. Note that this is an optimization for + // fast-running code. Small things matter. + const sameLocale = __primitive("_wide_get_locale", a) == + __primitive("_wide_get_locale", b); + if sameLocale then + chpl_debug_writeln(" locality qualifies"); + else + chpl_debug_writeln(" locality does not qualify"); + + return sizeOk && sameLocale; + } + } +} diff --git a/modules/internal/ChapelStandard.chpl b/modules/internal/ChapelStandard.chpl index 3e10bd091846..662acb8dd374 100644 --- a/modules/internal/ChapelStandard.chpl +++ b/modules/internal/ChapelStandard.chpl @@ -76,6 +76,7 @@ module ChapelStandard { public use ChapelContext; public use ChapelStaticVars; public use ChapelRemoteVars; + public use ChapelArrayViewElision; // Standard modules. public use Types as Types; From e559bb285040718f324efef1cd3ba982341de8b8 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Tue, 25 Jun 2024 22:35:25 -0700 Subject: [PATCH 39/88] Remove a forgotten output Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 17 ----------------- modules/internal/ChapelArrayViewElision.chpl | 1 - 2 files changed, 18 deletions(-) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index d03651b7e593..74ce0994f003 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2412,23 +2412,6 @@ module ChapelArray { } - private proc allBounded(ranges: range(?)) param { - return ranges.bounds == boundKind.both; - } - - private proc allBounded(ranges) param { - if chpl__isTupleOfRanges(ranges) { - for param i in 0.. Date: Tue, 25 Jun 2024 22:37:13 -0700 Subject: [PATCH 40/88] Remove whitespace Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 8347b7e07aba..c0690172df9c 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -1,4 +1,3 @@ - /* * Copyright 2020-2024 Hewlett Packard Enterprise Development LP * Copyright 2004-2019 Cray Inc. From a680fa8919f6acb8da3c9c73f0349a4f1ab0401f Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Tue, 25 Jun 2024 22:53:15 -0700 Subject: [PATCH 41/88] Refactor the compiler, too Signed-off-by: Engin Kayraklioglu --- compiler/include/arrayViewElision.h | 55 +++++ compiler/include/forallOptimizations.h | 28 --- compiler/optimizations/CMakeLists.txt | 1 + compiler/optimizations/Makefile.share | 1 + compiler/optimizations/arrayViewElision.cpp | 213 ++++++++++++++++++ .../optimizations/forallOptimizations.cpp | 191 ---------------- compiler/passes/normalize.cpp | 3 + compiler/resolution/preFold.cpp | 1 + 8 files changed, 274 insertions(+), 219 deletions(-) create mode 100644 compiler/include/arrayViewElision.h create mode 100644 compiler/optimizations/arrayViewElision.cpp diff --git a/compiler/include/arrayViewElision.h b/compiler/include/arrayViewElision.h new file mode 100644 index 000000000000..a200b22ed2ea --- /dev/null +++ b/compiler/include/arrayViewElision.h @@ -0,0 +1,55 @@ +/* + * Copyright 2020-2024 Hewlett Packard Enterprise Development LP + * Copyright 2004-2019 Cray Inc. + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CallExpr.h" +#include "stmt.h" +#include "symbol.h" + +// interface for normalize +void arrayViewElision(); + +// interface for prefold +class ProtoSliceAssignHelper { +public: + ProtoSliceAssignHelper() = delete; + ProtoSliceAssignHelper(CallExpr* call); + ~ProtoSliceAssignHelper(); + + inline CondStmt* condStmt() const { return condStmt_; } + inline Expr* flag() const { return condStmt_->condExpr; } + inline bool supported() const { return supported_; } + inline BlockStmt* staticCheckBlock() const { return staticCheckBlock_; } + + CallExpr* getReplacement(); + +private: + CallExpr* call_; + CallExpr* newProtoSliceLhs_; + CallExpr* newProtoSliceRhs_; + CondStmt* condStmt_; + Symbol* tmpCondFlag_; + bool supported_; + BlockStmt* staticCheckBlock_; + + void findCondStmt(); + void findProtoSlices(); + bool handleOneProtoSlice(CallExpr* call); + CallExpr* findOneProtoSliceCall(Expr* e); +}; diff --git a/compiler/include/forallOptimizations.h b/compiler/include/forallOptimizations.h index 656c3c4873d4..ced0511338cc 100644 --- a/compiler/include/forallOptimizations.h +++ b/compiler/include/forallOptimizations.h @@ -77,32 +77,4 @@ void removeAggregationFromRecursiveForall(ForallStmt *forall); void transformConditionalAggregation(CondStmt *cond); void cleanupRemainingAggCondStmts(); -class ProtoSliceAssignHelper { -public: - ProtoSliceAssignHelper() = delete; - ProtoSliceAssignHelper(CallExpr* call); - ~ProtoSliceAssignHelper(); - - inline CondStmt* condStmt() const { return condStmt_; } - inline Expr* flag() const { return condStmt_->condExpr; } - inline bool supported() const { return supported_; } - inline BlockStmt* staticCheckBlock() const { return staticCheckBlock_; } - - CallExpr* getReplacement(); - -private: - CallExpr* call_; - CallExpr* newProtoSliceLhs_; - CallExpr* newProtoSliceRhs_; - CondStmt* condStmt_; - Symbol* tmpCondFlag_; - bool supported_; - BlockStmt* staticCheckBlock_; - - void findCondStmt(); - void findProtoSlices(); - bool handleOneProtoSlice(CallExpr* call); - CallExpr* findOneProtoSliceCall(Expr* e); -}; - #endif diff --git a/compiler/optimizations/CMakeLists.txt b/compiler/optimizations/CMakeLists.txt index 5ba45dbeb253..0e775877729f 100644 --- a/compiler/optimizations/CMakeLists.txt +++ b/compiler/optimizations/CMakeLists.txt @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. set(SRCS + arrayViewElision.cpp bulkCopyRecords.cpp copyPropagation.cpp deadCodeElimination.cpp diff --git a/compiler/optimizations/Makefile.share b/compiler/optimizations/Makefile.share index 7d74f9f0d583..1c9b953534f0 100644 --- a/compiler/optimizations/Makefile.share +++ b/compiler/optimizations/Makefile.share @@ -17,6 +17,7 @@ # limitations under the License. OPTIMIZATIONS_SRCS = \ + arrayViewElision.cpp \ bulkCopyRecords.cpp \ copyPropagation.cpp \ deadCodeElimination.cpp \ diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp new file mode 100644 index 000000000000..56fc463d78b1 --- /dev/null +++ b/compiler/optimizations/arrayViewElision.cpp @@ -0,0 +1,213 @@ +/* + * Copyright 2020-2024 Hewlett Packard Enterprise Development LP + * Copyright 2004-2019 Cray Inc. + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "arrayViewElision.h" +#include "global-ast-vecs.h" +#include "passes.h" +#include "resolution.h" + +static bool exprSuitableForProtoSlice(Expr* e) { + if (CallExpr* call = toCallExpr(e)) { + if (SymExpr* callBase = toSymExpr(call->baseExpr)) { + if (!isFnSymbol(callBase->symbol())) { + return true; + } + } + } + return false; +} + +ProtoSliceAssignHelper::ProtoSliceAssignHelper(CallExpr* call): + call_(call), + newProtoSliceLhs_(nullptr), + newProtoSliceRhs_(nullptr), + condStmt_(nullptr), + supported_(false), + staticCheckBlock_(nullptr) { + + findProtoSlices(); + INT_ASSERT(newProtoSliceLhs_); + INT_ASSERT(newProtoSliceRhs_); + + + // this is just a temporary block. we add some AST in it, resolve and then + // remove them. I cannot be sure if those operations could leave any AST in. + // So, when we destroy this helper, we'll remove this block just to be sure. + staticCheckBlock_ = new BlockStmt(); + + BlockStmt* parentBlock = toBlockStmt(call->parentExpr); + parentBlock->insertAtHead(staticCheckBlock_); + + supported_ = handleOneProtoSlice(newProtoSliceLhs_) && + handleOneProtoSlice(newProtoSliceRhs_); + + findCondStmt(); + INT_ASSERT(condStmt_); +} + +ProtoSliceAssignHelper::~ProtoSliceAssignHelper() { + staticCheckBlock_->remove(); + tmpCondFlag_->getSingleDef()->getStmtExpr()->remove(); + tmpCondFlag_->defPoint->remove(); +} + +CallExpr* ProtoSliceAssignHelper::getReplacement() { + return new CallExpr("=", call_->get(1)->copy(), call_->get(2)->copy()); +} + +bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call) { + INT_ASSERT(call->isNamed("chpl__createProtoSlice")); + + CallExpr* typeCheck = new CallExpr("chpl__typesSupportArrayViewElision"); + for_actuals (actual, call) { + typeCheck->insertAtTail(actual->typeInfo()->symbol); + } + + VarSymbol* tmp = newTemp("call_tmp", dtBool); + DefExpr* flagDef = new DefExpr(tmp, typeCheck); + + staticCheckBlock_->insertAtTail(flagDef); + + resolveExpr(typeCheck); + resolveExpr(flagDef); + + bool ret = (toSymExpr(flagDef->init)->symbol() == gTrue); + + flagDef->remove(); + + return ret; +} + +// e must be the lhs or rhs of PRIM_ASSIGN_PROTO_SLICES +// returns the `chpl__createProtoSlice call +CallExpr* ProtoSliceAssignHelper::findOneProtoSliceCall(Expr* e) { + SymExpr* lhsSE = toSymExpr(call_->get(1)); + INT_ASSERT(lhsSE); + + Symbol* lhs = lhsSE->symbol(); + CallExpr* lhsTmpMove = toCallExpr(lhs->getSingleDef()->getStmtExpr()); + INT_ASSERT(lhsTmpMove && lhsTmpMove->isPrimitive(PRIM_MOVE)); + + SymExpr* lhsTmpSE = toSymExpr(lhsTmpMove->get(2)); + INT_ASSERT(lhsTmpSE); + + Symbol* lhsTmpSym = lhsTmpSE->symbol(); + CallExpr* lhsMove = toCallExpr(lhsTmpSym->getSingleDef()->getStmtExpr()); + INT_ASSERT(lhsMove && lhsMove->isPrimitive(PRIM_MOVE)); + + return toCallExpr(lhsMove->get(2)); +} + +void ProtoSliceAssignHelper::findProtoSlices() { + newProtoSliceLhs_ = findOneProtoSliceCall(call_->get(1)); + newProtoSliceRhs_ = findOneProtoSliceCall(call_->get(2)); +} + +void ProtoSliceAssignHelper::findCondStmt() { + Expr* cur = call_; + while (cur) { + if (CondStmt* condStmt = toCondStmt(cur)) { + if (SymExpr* condExpr = toSymExpr(condStmt->condExpr)) { + if (condExpr->symbol()->hasFlag(FLAG_ARRAYVIEW_ELISION_FLAG)) { + tmpCondFlag_ = condExpr->symbol(); + condStmt_ = condStmt; + break; + } + else { + // this is an unknown conditional, this shouldn't have happened + INT_FATAL(call_, + "unexpected syntax tree generated by arrayview elision"); + } + } + } + + cur = cur->parentExpr; + } + + if (condStmt_ == nullptr) { + // where is the conditional? + INT_FATAL(call_, + "unexpected syntax tree generated by arrayview elision"); + } +} + +void arrayViewElision() { + std::vector candidates; + + for_alive_in_Vec (CallExpr, call, gCallExprs) { + if (call->getModule()->modTag == MOD_USER) { + if (call->isNamed("=")) { + if (exprSuitableForProtoSlice(call->get(1)) && + exprSuitableForProtoSlice(call->get(2))) { + //std::cout << call->stringLoc() << std::endl; + //nprint_view(call); + candidates.push_back(call); + } + } + } + } + + for_vector(CallExpr, call, candidates) { + SET_LINENO(call); + + CallExpr* lhs = toCallExpr(call->get(1)); + CallExpr* rhs = toCallExpr(call->get(2)); + + Expr* lhsBase = lhs->baseExpr; + Expr* rhsBase = rhs->baseExpr; + + CallExpr* lhsPSCall = new CallExpr("chpl__createProtoSlice", lhsBase->copy()); + for_actuals(actual, lhs) { + lhsPSCall->insertAtTail(actual->copy()); + } + + CallExpr* rhsPSCall = new CallExpr("chpl__createProtoSlice", rhsBase->copy()); + for_actuals(actual, rhs) { + rhsPSCall->insertAtTail(actual->copy()); + } + + // arrayview elision placeholder + VarSymbol* placeholder = new VarSymbol("arrayview_elision_flag", dtBool); + placeholder->addFlag(FLAG_ARRAYVIEW_ELISION_FLAG); + + call->insertBefore(new DefExpr(placeholder, gFalse)); + + BlockStmt* thenBlock = new BlockStmt(); + VarSymbol* lhsPS = new VarSymbol("lhs_proto_slice"); + VarSymbol* rhsPS = new VarSymbol("rhs_proto_slice"); + + thenBlock->insertAtTail(new DefExpr(lhsPS, lhsPSCall)); + thenBlock->insertAtTail(new DefExpr(rhsPS, rhsPSCall)); + thenBlock->insertAtTail(new CallExpr(PRIM_PROTO_SLICE_ASSIGN, lhsPS, + rhsPS)); + + BlockStmt* elseBlock = new BlockStmt(); + + CondStmt* cond = new CondStmt(new SymExpr(placeholder), thenBlock, + elseBlock); + + call->insertBefore(cond); + elseBlock->insertAtTail(call->remove()); + + //list_view(cond); + } +} diff --git a/compiler/optimizations/forallOptimizations.cpp b/compiler/optimizations/forallOptimizations.cpp index 7f7bb2ff1e58..10d902ccb06e 100644 --- a/compiler/optimizations/forallOptimizations.cpp +++ b/compiler/optimizations/forallOptimizations.cpp @@ -142,11 +142,7 @@ static void removeAggregatorFromFunction(Symbol *aggregator, FnSymbol *parent); static void removeAggregationFromRecursiveForallHelp(BlockStmt *block); static void autoAggregation(ForallStmt *forall); -static void bulkViewTransfer(); - void doPreNormalizeArrayOptimizations() { - bulkViewTransfer(); - const bool anyAnalysisNeeded = fAutoLocalAccess || fAutoAggregation || !fNoFastFollowers; @@ -2457,190 +2453,3 @@ static bool isLocalAccess(CallExpr *call) { return false; } - -static bool exprSuitableForProtoSlice(Expr* e) { - if (CallExpr* call = toCallExpr(e)) { - if (SymExpr* callBase = toSymExpr(call->baseExpr)) { - if (!isFnSymbol(callBase->symbol())) { - return true; - } - } - } - return false; -} - -ProtoSliceAssignHelper::ProtoSliceAssignHelper(CallExpr* call): - call_(call), - newProtoSliceLhs_(nullptr), - newProtoSliceRhs_(nullptr), - condStmt_(nullptr), - supported_(false), - staticCheckBlock_(nullptr) { - - findProtoSlices(); - INT_ASSERT(newProtoSliceLhs_); - INT_ASSERT(newProtoSliceRhs_); - - - // this is just a temporary block. we add some AST in it, resolve and then - // remove them. I cannot be sure if those operations could leave any AST in. - // So, when we destroy this helper, we'll remove this block just to be sure. - staticCheckBlock_ = new BlockStmt(); - - BlockStmt* parentBlock = toBlockStmt(call->parentExpr); - parentBlock->insertAtHead(staticCheckBlock_); - - supported_ = handleOneProtoSlice(newProtoSliceLhs_) && - handleOneProtoSlice(newProtoSliceRhs_); - - findCondStmt(); - INT_ASSERT(condStmt_); -} - -ProtoSliceAssignHelper::~ProtoSliceAssignHelper() { - staticCheckBlock_->remove(); - tmpCondFlag_->getSingleDef()->getStmtExpr()->remove(); - tmpCondFlag_->defPoint->remove(); -} - -CallExpr* ProtoSliceAssignHelper::getReplacement() { - return new CallExpr("=", call_->get(1)->copy(), call_->get(2)->copy()); -} - -bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call) { - INT_ASSERT(call->isNamed("chpl__createProtoSlice")); - - CallExpr* typeCheck = new CallExpr("chpl__typesSupportArrayViewElision"); - for_actuals (actual, call) { - typeCheck->insertAtTail(actual->typeInfo()->symbol); - } - - VarSymbol* tmp = newTemp("call_tmp", dtBool); - DefExpr* flagDef = new DefExpr(tmp, typeCheck); - - staticCheckBlock_->insertAtTail(flagDef); - - resolveExpr(typeCheck); - resolveExpr(flagDef); - - bool ret = (toSymExpr(flagDef->init)->symbol() == gTrue); - - flagDef->remove(); - - return ret; -} - -// e must be the lhs or rhs of PRIM_ASSIGN_PROTO_SLICES -// returns the `chpl__createProtoSlice call -CallExpr* ProtoSliceAssignHelper::findOneProtoSliceCall(Expr* e) { - SymExpr* lhsSE = toSymExpr(call_->get(1)); - INT_ASSERT(lhsSE); - - Symbol* lhs = lhsSE->symbol(); - CallExpr* lhsTmpMove = toCallExpr(lhs->getSingleDef()->getStmtExpr()); - INT_ASSERT(lhsTmpMove && lhsTmpMove->isPrimitive(PRIM_MOVE)); - - SymExpr* lhsTmpSE = toSymExpr(lhsTmpMove->get(2)); - INT_ASSERT(lhsTmpSE); - - Symbol* lhsTmpSym = lhsTmpSE->symbol(); - CallExpr* lhsMove = toCallExpr(lhsTmpSym->getSingleDef()->getStmtExpr()); - INT_ASSERT(lhsMove && lhsMove->isPrimitive(PRIM_MOVE)); - - return toCallExpr(lhsMove->get(2)); -} - -void ProtoSliceAssignHelper::findProtoSlices() { - newProtoSliceLhs_ = findOneProtoSliceCall(call_->get(1)); - newProtoSliceRhs_ = findOneProtoSliceCall(call_->get(2)); -} - -void ProtoSliceAssignHelper::findCondStmt() { - Expr* cur = call_; - while (cur) { - if (CondStmt* condStmt = toCondStmt(cur)) { - if (SymExpr* condExpr = toSymExpr(condStmt->condExpr)) { - if (condExpr->symbol()->hasFlag(FLAG_ARRAYVIEW_ELISION_FLAG)) { - tmpCondFlag_ = condExpr->symbol(); - condStmt_ = condStmt; - break; - } - else { - // this is an unknown conditional, this shouldn't have happened - INT_FATAL(call_, - "unexpected syntax tree generated by arrayview elision"); - } - } - } - - cur = cur->parentExpr; - } - - if (condStmt_ == nullptr) { - // where is the conditional? - INT_FATAL(call_, - "unexpected syntax tree generated by arrayview elision"); - } -} - -static void bulkViewTransfer() { - std::vector candidates; - - for_alive_in_Vec (CallExpr, call, gCallExprs) { - if (call->getModule()->modTag == MOD_USER) { - if (call->isNamed("=")) { - if (exprSuitableForProtoSlice(call->get(1)) && - exprSuitableForProtoSlice(call->get(2))) { - //std::cout << call->stringLoc() << std::endl; - //nprint_view(call); - candidates.push_back(call); - } - } - } - } - - for_vector(CallExpr, call, candidates) { - SET_LINENO(call); - - CallExpr* lhs = toCallExpr(call->get(1)); - CallExpr* rhs = toCallExpr(call->get(2)); - - Expr* lhsBase = lhs->baseExpr; - Expr* rhsBase = rhs->baseExpr; - - CallExpr* lhsPSCall = new CallExpr("chpl__createProtoSlice", lhsBase->copy()); - for_actuals(actual, lhs) { - lhsPSCall->insertAtTail(actual->copy()); - } - - CallExpr* rhsPSCall = new CallExpr("chpl__createProtoSlice", rhsBase->copy()); - for_actuals(actual, rhs) { - rhsPSCall->insertAtTail(actual->copy()); - } - - // arrayview elision placeholder - VarSymbol* placeholder = new VarSymbol("arrayview_elision_flag", dtBool); - placeholder->addFlag(FLAG_ARRAYVIEW_ELISION_FLAG); - - call->insertBefore(new DefExpr(placeholder, gFalse)); - - BlockStmt* thenBlock = new BlockStmt(); - VarSymbol* lhsPS = new VarSymbol("lhs_proto_slice"); - VarSymbol* rhsPS = new VarSymbol("rhs_proto_slice"); - - thenBlock->insertAtTail(new DefExpr(lhsPS, lhsPSCall)); - thenBlock->insertAtTail(new DefExpr(rhsPS, rhsPSCall)); - thenBlock->insertAtTail(new CallExpr(PRIM_PROTO_SLICE_ASSIGN, lhsPS, - rhsPS)); - - BlockStmt* elseBlock = new BlockStmt(); - - CondStmt* cond = new CondStmt(new SymExpr(placeholder), thenBlock, - elseBlock); - - call->insertBefore(cond); - elseBlock->insertAtTail(call->remove()); - - //list_view(cond); - } -} diff --git a/compiler/passes/normalize.cpp b/compiler/passes/normalize.cpp index 031a0fd0fa92..d951d2f4fc40 100644 --- a/compiler/passes/normalize.cpp +++ b/compiler/passes/normalize.cpp @@ -26,6 +26,7 @@ #include "baseAST.h" #include "passes.h" +#include "arrayViewElision.h" #include "astutil.h" #include "build.h" #include "DecoratedClassType.h" @@ -172,6 +173,8 @@ void normalize() { insertModuleInit(); + arrayViewElision(); + doPreNormalizeArrayOptimizations(); moveAndCheckInterfaceConstraints(); diff --git a/compiler/resolution/preFold.cpp b/compiler/resolution/preFold.cpp index 21c8a8906fb0..00c034c28d44 100644 --- a/compiler/resolution/preFold.cpp +++ b/compiler/resolution/preFold.cpp @@ -20,6 +20,7 @@ #include "preFold.h" +#include "arrayViewElision.h" #include "astutil.h" #include "buildDefaultFunctions.h" #include "fcf-support.h" From 27d34e3eaf06b6bdd90b79e947e19ceb60d12ee6 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 10:33:34 -0700 Subject: [PATCH 42/88] Reimplement the check for negative strides in the new implementation Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index c0690172df9c..55b1e7e64fac 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -191,6 +191,15 @@ module ChapelArrayViewElision { proc chpl__indexingExprsSupportAVE(type indexingTypes...) param: bool { for param tid in 0.. Date: Wed, 26 Jun 2024 10:44:05 -0700 Subject: [PATCH 43/88] Add a flag to control the optimization behavior and a test Signed-off-by: Engin Kayraklioglu --- compiler/include/driver.h | 3 +++ compiler/main/driver.cpp | 5 +++++ compiler/optimizations/arrayViewElision.cpp | 2 ++ test/optimizations/arrayViewElision/flags.chpl | 6 ++++++ test/optimizations/arrayViewElision/flags.compopts | 2 ++ test/optimizations/arrayViewElision/flags.no-opt.good | 11 +++++++++++ test/optimizations/arrayViewElision/flags.opt.good | 6 ++++++ 7 files changed, 35 insertions(+) create mode 100644 test/optimizations/arrayViewElision/flags.chpl create mode 100644 test/optimizations/arrayViewElision/flags.compopts create mode 100644 test/optimizations/arrayViewElision/flags.no-opt.good create mode 100644 test/optimizations/arrayViewElision/flags.opt.good diff --git a/compiler/include/driver.h b/compiler/include/driver.h index 08d1c0fe307b..b32671eede6c 100644 --- a/compiler/include/driver.h +++ b/compiler/include/driver.h @@ -68,6 +68,9 @@ extern bool fReportAutoLocalAccess; extern bool fAutoAggregation; extern bool fReportAutoAggregation; +extern bool fArrayViewElision; +extern bool fReportArrayViewElision; + extern bool fNoRemoteValueForwarding; extern bool fNoInferConstRefs; extern bool fNoRemoteSerialization; diff --git a/compiler/main/driver.cpp b/compiler/main/driver.cpp index abcb5d0213e1..e4b1f7ba88b3 100644 --- a/compiler/main/driver.cpp +++ b/compiler/main/driver.cpp @@ -215,6 +215,9 @@ bool fReportAutoLocalAccess= false; bool fAutoAggregation = false; bool fReportAutoAggregation= false; +bool fArrayViewElision = true; +bool fReportArrayViewElision = false; + bool printPasses = false; FILE* printPassesFile = NULL; @@ -1274,6 +1277,8 @@ static ArgumentDescription arg_desc[] = { {"auto-aggregation", ' ', NULL, "Enable [disable] automatically aggregating remote accesses in foralls", "N", &fAutoAggregation, "CHPL_AUTO_AGGREGATION", NULL}, + {"array-view-elision", ' ', NULL, "Enable [disable] array view elision", "N", &fArrayViewElision, "CHPL_DISABLE_ARRAY_VIEW_ELISION", NULL}, + {"", ' ', NULL, "Run-time Semantic Check Options", NULL, NULL, NULL, NULL}, {"checks", ' ', NULL, "Enable [disable] all following run-time checks", "n", &fNoChecks, "CHPL_CHECKS", setChecks}, {"bounds-checks", ' ', NULL, "Enable [disable] bounds checking", "n", &fNoBoundsChecks, "CHPL_BOUNDS_CHECKING", NULL}, diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 56fc463d78b1..294ff31f1d05 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -151,6 +151,8 @@ void ProtoSliceAssignHelper::findCondStmt() { } void arrayViewElision() { + if (!fArrayViewElision) return; + std::vector candidates; for_alive_in_Vec (CallExpr, call, gCallExprs) { diff --git a/test/optimizations/arrayViewElision/flags.chpl b/test/optimizations/arrayViewElision/flags.chpl new file mode 100644 index 000000000000..3ab31c0e2e4a --- /dev/null +++ b/test/optimizations/arrayViewElision/flags.chpl @@ -0,0 +1,6 @@ +var A: [1..1000] int; +var B: [1..1000] int; + +A[1..1000] = B[1..1000]; + +writeln(A[3]); diff --git a/test/optimizations/arrayViewElision/flags.compopts b/test/optimizations/arrayViewElision/flags.compopts new file mode 100644 index 000000000000..3b5fea555689 --- /dev/null +++ b/test/optimizations/arrayViewElision/flags.compopts @@ -0,0 +1,2 @@ +--array-view-elision -sdebugBulkTransfer=true # flags.opt +--no-array-view-elision -sdebugBulkTransfer=true # flags.no-opt diff --git a/test/optimizations/arrayViewElision/flags.no-opt.good b/test/optimizations/arrayViewElision/flags.no-opt.good new file mode 100644 index 000000000000..217f7cf4fbb1 --- /dev/null +++ b/test/optimizations/arrayViewElision/flags.no-opt.good @@ -0,0 +1,11 @@ +operator =(a:[],b:[]): in chpl__bulkTransferArray +operator =(a:[],b:[]): attempting doiBulkTransferFromKnown +operator =(a:[],b:[]): in chpl__bulkTransferArray +operator =(a:[],b:[]): attempting doiBulkTransferToKnown +operator =(a:[],b:[]): in chpl__bulkTransferArray +operator =(a:[],b:[]): attempting doiBulkTransferFromKnown +In DefaultRectangular._simpleTransfer(): Alo=(1,), Blo=(1,), len=1000, elemSize=8 +operator =(a:[],b:[]): successfully completed bulk transfer +operator =(a:[],b:[]): successfully completed bulk transfer +operator =(a:[],b:[]): successfully completed bulk transfer +0 diff --git a/test/optimizations/arrayViewElision/flags.opt.good b/test/optimizations/arrayViewElision/flags.opt.good new file mode 100644 index 000000000000..16312e544ebc --- /dev/null +++ b/test/optimizations/arrayViewElision/flags.opt.good @@ -0,0 +1,6 @@ +Performing protoSlice bulk transfer +operator =(a:[],b:[]): in chpl__bulkTransferArray +operator =(a:[],b:[]): attempting doiBulkTransferFromKnown +In DefaultRectangular._simpleTransfer(): Alo=(1,), Blo=(1,), len=1000, elemSize=8 +operator =(a:[],b:[]): successfully completed bulk transfer +0 From 196d1fb8b018066712faa06762691ead321023dd Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 10:46:47 -0700 Subject: [PATCH 44/88] Fix the intent in chpl__createProtoSlice Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 55b1e7e64fac..2d6911ac87f5 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -151,19 +151,19 @@ module ChapelArrayViewElision { // TODO can we allow const arrs to be passed here without breaking constness // guarantees? // TODO we can also accept domains and ints (rank-change) - proc chpl__createProtoSlice(ref Arr, slicingExprs: range(?)) + proc chpl__createProtoSlice(const ref Arr, slicingExprs: range(?)) where chpl__baseTypeSupportAVE(Arr.type) { return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } pragma "last resort" - proc chpl__createProtoSlice(ref Arr, slicingExprs:range(?) ...) + proc chpl__createProtoSlice(const ref Arr, slicingExprs:range(?) ...) where chpl__baseTypeSupportAVE(Arr.type) { return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } pragma "last resort" - proc chpl__createProtoSlice(ref Arr, slicingExprs... ) { + proc chpl__createProtoSlice(const ref Arr, slicingExprs... ) { // this is an array access. This call will be eliminated later in // resolution, but we want it to live for a bit for easier resolution return new chpl__protoSlice(); From 91a5678040014c729dffaf69c95563a82941ae19 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 11:29:17 -0700 Subject: [PATCH 45/88] Use values instead of types, add const overloads Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/arrayViewElision.cpp | 3 +- modules/internal/ChapelArrayViewElision.chpl | 47 ++++++++++++-------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 294ff31f1d05..27bdc426d77c 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -79,7 +79,8 @@ bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call) { CallExpr* typeCheck = new CallExpr("chpl__typesSupportArrayViewElision"); for_actuals (actual, call) { - typeCheck->insertAtTail(actual->typeInfo()->symbol); + INT_ASSERT(isSymExpr(actual)); + typeCheck->insertAtTail(actual->copy()); } VarSymbol* tmp = newTemp("call_tmp", dtBool); diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 2d6911ac87f5..8ca555909735 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -151,17 +151,29 @@ module ChapelArrayViewElision { // TODO can we allow const arrs to be passed here without breaking constness // guarantees? // TODO we can also accept domains and ints (rank-change) - proc chpl__createProtoSlice(const ref Arr, slicingExprs: range(?)) - where chpl__baseTypeSupportAVE(Arr.type) { + proc chpl__createProtoSlice(ref Arr, slicingExprs: range(?)) + where chpl__baseTypeSupportAVE(Arr) { return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } pragma "last resort" - proc chpl__createProtoSlice(const ref Arr, slicingExprs:range(?) ...) - where chpl__baseTypeSupportAVE(Arr.type) { + proc chpl__createProtoSlice(ref Arr, slicingExprs:range(?) ...) + where chpl__baseTypeSupportAVE(Arr) { return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } + pragma "last resort" + proc chpl__createProtoSlice(const ref Arr, slicingExprs: range(?)) + where chpl__baseTypeSupportAVE(Arr) { + return new chpl__protoSlice(c_addrOfConst(Arr), slicingExprs); + } + + pragma "last resort" + proc chpl__createProtoSlice(const ref Arr, slicingExprs:range(?) ...) + where chpl__baseTypeSupportAVE(Arr) { + return new chpl__protoSlice(c_addrOfConst(Arr), slicingExprs); + } + pragma "last resort" proc chpl__createProtoSlice(const ref Arr, slicingExprs... ) { // this is an array access. This call will be eliminated later in @@ -180,23 +192,22 @@ module ChapelArrayViewElision { return false; } - proc chpl__baseTypeSupportAVE(type baseType) param: bool { + proc chpl__baseTypeSupportAVE(base) param: bool { import Reflection; - var dummy: baseType; - return isArrayType(baseType) && - isSubtype(dummy._instance.type, DefaultRectangularArr) && - Reflection.canResolve("c_addrOf", dummy); + return isArray(base) && // also could be a view? + isSubtype(base._instance.type, DefaultRectangularArr) && + Reflection.canResolve("c_addrOf", base); } - proc chpl__indexingExprsSupportAVE(type indexingTypes...) param: bool { - for param tid in 0.. Date: Wed, 26 Jun 2024 11:46:15 -0700 Subject: [PATCH 46/88] Add support for reporting, some code juggling in the compiler Signed-off-by: Engin Kayraklioglu --- compiler/include/arrayViewElision.h | 1 + compiler/main/driver.cpp | 1 + compiler/optimizations/arrayViewElision.cpp | 141 +++++++++++--------- compiler/resolution/preFold.cpp | 2 + 4 files changed, 80 insertions(+), 65 deletions(-) diff --git a/compiler/include/arrayViewElision.h b/compiler/include/arrayViewElision.h index a200b22ed2ea..798b651bf84a 100644 --- a/compiler/include/arrayViewElision.h +++ b/compiler/include/arrayViewElision.h @@ -38,6 +38,7 @@ class ProtoSliceAssignHelper { inline BlockStmt* staticCheckBlock() const { return staticCheckBlock_; } CallExpr* getReplacement(); + void report(); private: CallExpr* call_; diff --git a/compiler/main/driver.cpp b/compiler/main/driver.cpp index e4b1f7ba88b3..9650ad65f39d 100644 --- a/compiler/main/driver.cpp +++ b/compiler/main/driver.cpp @@ -1408,6 +1408,7 @@ static ArgumentDescription arg_desc[] = { {"report-optimized-on", ' ', NULL, "Print information about on clauses that have been optimized for potential fast remote fork operation", "F", &fReportOptimizedOn, NULL, NULL}, {"report-auto-local-access", ' ', NULL, "Enable compiler logs for auto local access optimization", "N", &fReportAutoLocalAccess, "CHPL_REPORT_AUTO_LOCAL_ACCESS", NULL}, {"report-auto-aggregation", ' ', NULL, "Enable compiler logs for automatic aggregation", "N", &fReportAutoAggregation, "CHPL_REPORT_AUTO_AGGREGATION", NULL}, + {"report-array-view-elision", ' ', NULL, "Enable compiler logs for array view elision", "N", &fReportArrayViewElision, "CHPL_REPORT_ARRAY_VIEW_ELISION", NULL}, {"report-optimized-forall-unordered-ops", ' ', NULL, "Show which statements in foralls have been converted to unordered operations", "F", &fReportOptimizeForallUnordered, NULL, NULL}, {"report-promotion", ' ', NULL, "Print information about scalar promotion", "F", &fReportPromotion, NULL, NULL}, {"report-scalar-replace", ' ', NULL, "Print scalar replacement stats", "F", &fReportScalarReplace, NULL, NULL}, diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 27bdc426d77c..59dd76b79d38 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -25,15 +25,70 @@ #include "passes.h" #include "resolution.h" -static bool exprSuitableForProtoSlice(Expr* e) { - if (CallExpr* call = toCallExpr(e)) { - if (SymExpr* callBase = toSymExpr(call->baseExpr)) { - if (!isFnSymbol(callBase->symbol())) { - return true; +static bool exprSuitableForProtoSlice(Expr* e); + +void arrayViewElision() { + if (!fArrayViewElision) return; + + std::vector candidates; + + for_alive_in_Vec (CallExpr, call, gCallExprs) { + if (call->getModule()->modTag == MOD_USER) { + if (call->isNamed("=")) { + if (exprSuitableForProtoSlice(call->get(1)) && + exprSuitableForProtoSlice(call->get(2))) { + //std::cout << call->stringLoc() << std::endl; + //nprint_view(call); + candidates.push_back(call); + } } } } - return false; + + for_vector(CallExpr, call, candidates) { + SET_LINENO(call); + + CallExpr* lhs = toCallExpr(call->get(1)); + CallExpr* rhs = toCallExpr(call->get(2)); + + Expr* lhsBase = lhs->baseExpr; + Expr* rhsBase = rhs->baseExpr; + + CallExpr* lhsPSCall = new CallExpr("chpl__createProtoSlice", lhsBase->copy()); + for_actuals(actual, lhs) { + lhsPSCall->insertAtTail(actual->copy()); + } + + CallExpr* rhsPSCall = new CallExpr("chpl__createProtoSlice", rhsBase->copy()); + for_actuals(actual, rhs) { + rhsPSCall->insertAtTail(actual->copy()); + } + + // arrayview elision placeholder + VarSymbol* placeholder = new VarSymbol("arrayview_elision_flag", dtBool); + placeholder->addFlag(FLAG_ARRAYVIEW_ELISION_FLAG); + + call->insertBefore(new DefExpr(placeholder, gFalse)); + + BlockStmt* thenBlock = new BlockStmt(); + VarSymbol* lhsPS = new VarSymbol("lhs_proto_slice"); + VarSymbol* rhsPS = new VarSymbol("rhs_proto_slice"); + + thenBlock->insertAtTail(new DefExpr(lhsPS, lhsPSCall)); + thenBlock->insertAtTail(new DefExpr(rhsPS, rhsPSCall)); + thenBlock->insertAtTail(new CallExpr(PRIM_PROTO_SLICE_ASSIGN, lhsPS, + rhsPS)); + + BlockStmt* elseBlock = new BlockStmt(); + + CondStmt* cond = new CondStmt(new SymExpr(placeholder), thenBlock, + elseBlock); + + call->insertBefore(cond); + elseBlock->insertAtTail(call->remove()); + + //list_view(cond); + } } ProtoSliceAssignHelper::ProtoSliceAssignHelper(CallExpr* call): @@ -74,6 +129,14 @@ CallExpr* ProtoSliceAssignHelper::getReplacement() { return new CallExpr("=", call_->get(1)->copy(), call_->get(2)->copy()); } +void ProtoSliceAssignHelper::report() { + if (!fReportArrayViewElision) return; + + std::string isSupported = supported() ? "supported" : "not supported"; + std::cout << "ArrayViewElision " << isSupported << " " << call_->stringLoc() + << std::endl; +} + bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call) { INT_ASSERT(call->isNamed("chpl__createProtoSlice")); @@ -151,66 +214,14 @@ void ProtoSliceAssignHelper::findCondStmt() { } } -void arrayViewElision() { - if (!fArrayViewElision) return; - - std::vector candidates; - - for_alive_in_Vec (CallExpr, call, gCallExprs) { - if (call->getModule()->modTag == MOD_USER) { - if (call->isNamed("=")) { - if (exprSuitableForProtoSlice(call->get(1)) && - exprSuitableForProtoSlice(call->get(2))) { - //std::cout << call->stringLoc() << std::endl; - //nprint_view(call); - candidates.push_back(call); - } +static bool exprSuitableForProtoSlice(Expr* e) { + if (CallExpr* call = toCallExpr(e)) { + if (SymExpr* callBase = toSymExpr(call->baseExpr)) { + if (!isFnSymbol(callBase->symbol())) { + return true; } } } - - for_vector(CallExpr, call, candidates) { - SET_LINENO(call); - - CallExpr* lhs = toCallExpr(call->get(1)); - CallExpr* rhs = toCallExpr(call->get(2)); - - Expr* lhsBase = lhs->baseExpr; - Expr* rhsBase = rhs->baseExpr; - - CallExpr* lhsPSCall = new CallExpr("chpl__createProtoSlice", lhsBase->copy()); - for_actuals(actual, lhs) { - lhsPSCall->insertAtTail(actual->copy()); - } - - CallExpr* rhsPSCall = new CallExpr("chpl__createProtoSlice", rhsBase->copy()); - for_actuals(actual, rhs) { - rhsPSCall->insertAtTail(actual->copy()); - } - - // arrayview elision placeholder - VarSymbol* placeholder = new VarSymbol("arrayview_elision_flag", dtBool); - placeholder->addFlag(FLAG_ARRAYVIEW_ELISION_FLAG); - - call->insertBefore(new DefExpr(placeholder, gFalse)); - - BlockStmt* thenBlock = new BlockStmt(); - VarSymbol* lhsPS = new VarSymbol("lhs_proto_slice"); - VarSymbol* rhsPS = new VarSymbol("rhs_proto_slice"); - - thenBlock->insertAtTail(new DefExpr(lhsPS, lhsPSCall)); - thenBlock->insertAtTail(new DefExpr(rhsPS, rhsPSCall)); - thenBlock->insertAtTail(new CallExpr(PRIM_PROTO_SLICE_ASSIGN, lhsPS, - rhsPS)); - - BlockStmt* elseBlock = new BlockStmt(); - - CondStmt* cond = new CondStmt(new SymExpr(placeholder), thenBlock, - elseBlock); - - call->insertBefore(cond); - elseBlock->insertAtTail(call->remove()); - - //list_view(cond); - } + return false; } + diff --git a/compiler/resolution/preFold.cpp b/compiler/resolution/preFold.cpp index 00c034c28d44..dc9634b49d23 100644 --- a/compiler/resolution/preFold.cpp +++ b/compiler/resolution/preFold.cpp @@ -920,6 +920,8 @@ static Expr* preFoldPrimOp(CallExpr* call) { case PRIM_PROTO_SLICE_ASSIGN: { ProtoSliceAssignHelper assignment(call); + assignment.report(); + if (assignment.supported()) { retval = assignment.getReplacement(); call->replace(retval); From 45f7ddc62c3de8aee0b7e00dc454d0e904a422c6 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 11:48:26 -0700 Subject: [PATCH 47/88] Reduce number overloads to avoid ambiguity Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 8ca555909735..6697c62d240b 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -148,24 +148,12 @@ module ChapelArrayViewElision { } } - // TODO can we allow const arrs to be passed here without breaking constness - // guarantees? - // TODO we can also accept domains and ints (rank-change) - proc chpl__createProtoSlice(ref Arr, slicingExprs: range(?)) - where chpl__baseTypeSupportAVE(Arr) { - return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); - } - - pragma "last resort" proc chpl__createProtoSlice(ref Arr, slicingExprs:range(?) ...) where chpl__baseTypeSupportAVE(Arr) { - return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); - } - - pragma "last resort" - proc chpl__createProtoSlice(const ref Arr, slicingExprs: range(?)) - where chpl__baseTypeSupportAVE(Arr) { - return new chpl__protoSlice(c_addrOfConst(Arr), slicingExprs); + if slicingExprs.size == 1 then + return new chpl__protoSlice(c_addrOf(Arr), slicingExprs[0]); + else + return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); } pragma "last resort" From f6b0be1781104375f56bd816a62053c9bd10aa52 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 11:49:43 -0700 Subject: [PATCH 48/88] Remove unused junk code Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 6697c62d240b..c02ab066e210 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -169,17 +169,6 @@ module ChapelArrayViewElision { return new chpl__protoSlice(); } - - proc chpl__exprSupportsViewTransfer(base, exprs...) param { - if base.isDefaultRectangular() { - - - - } - - return false; - } - proc chpl__baseTypeSupportAVE(base) param: bool { import Reflection; return isArray(base) && // also could be a view? From 287415e37c3e5b739f3c1300f7a668028390911b Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 12:13:24 -0700 Subject: [PATCH 49/88] Improve the reporting, add a test Signed-off-by: Engin Kayraklioglu --- compiler/include/arrayViewElision.h | 9 +- compiler/optimizations/arrayViewElision.cpp | 39 +++++- .../arrayViewElision/basic.compopts | 2 +- .../optimizations/arrayViewElision/basic.good | 112 ++++++++++++++++++ 4 files changed, 157 insertions(+), 5 deletions(-) diff --git a/compiler/include/arrayViewElision.h b/compiler/include/arrayViewElision.h index 798b651bf84a..d5f448b24be0 100644 --- a/compiler/include/arrayViewElision.h +++ b/compiler/include/arrayViewElision.h @@ -49,8 +49,15 @@ class ProtoSliceAssignHelper { bool supported_; BlockStmt* staticCheckBlock_; + // support for report-array-view-elision + std::string lhsBaseType_; + std::string rhsBaseType_; + + std::vector lhsIndexExprTypes_; + std::vector rhsIndexExprTypes_; + void findCondStmt(); void findProtoSlices(); - bool handleOneProtoSlice(CallExpr* call); + bool handleOneProtoSlice(CallExpr* call, bool isLhs); CallExpr* findOneProtoSliceCall(Expr* e); }; diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 59dd76b79d38..d47784f76daa 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -112,8 +112,8 @@ ProtoSliceAssignHelper::ProtoSliceAssignHelper(CallExpr* call): BlockStmt* parentBlock = toBlockStmt(call->parentExpr); parentBlock->insertAtHead(staticCheckBlock_); - supported_ = handleOneProtoSlice(newProtoSliceLhs_) && - handleOneProtoSlice(newProtoSliceRhs_); + supported_ = handleOneProtoSlice(newProtoSliceLhs_, /* isLhs */ true) && + handleOneProtoSlice(newProtoSliceRhs_, /* isLhs */ false); findCondStmt(); INT_ASSERT(condStmt_); @@ -135,11 +135,44 @@ void ProtoSliceAssignHelper::report() { std::string isSupported = supported() ? "supported" : "not supported"; std::cout << "ArrayViewElision " << isSupported << " " << call_->stringLoc() << std::endl; + + std::cout << "\t" << "lhsBaseType: " << lhsBaseType_ << std::endl; + std::cout << "\t" << "lhsIndexingExprs: " << std::endl; + for (auto typeName: lhsIndexExprTypes_) { + std::cout << "\t\t" << typeName << std::endl; + } + + std::cout << "\t" << "rhsBaseType: " << rhsBaseType_ << std::endl; + std::cout << "\t" << "rhsIndexingExprs: " << std::endl; + for (auto typeName: rhsIndexExprTypes_) { + std::cout << "\t\t" << typeName << std::endl; + } + + std::cout << std::endl; } -bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call) { +bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call, bool isLhs) { INT_ASSERT(call->isNamed("chpl__createProtoSlice")); + // stash some information while working on the call + if (fReportArrayViewElision) { + std::string& baseType = isLhs ? lhsBaseType_ : rhsBaseType_; + std::vector& indexExprTypes = isLhs ? lhsIndexExprTypes_ : + rhsIndexExprTypes_; + + bool baseRecorded = false; + for_actuals (actual, call) { + std::string typeName = std::string(actual->typeInfo()->symbol->name); + if (!baseRecorded) { + baseType = typeName; + baseRecorded = true; + } + else { + indexExprTypes.push_back(typeName); + } + } + } + CallExpr* typeCheck = new CallExpr("chpl__typesSupportArrayViewElision"); for_actuals (actual, call) { INT_ASSERT(isSymExpr(actual)); diff --git a/test/optimizations/arrayViewElision/basic.compopts b/test/optimizations/arrayViewElision/basic.compopts index 037ae1ee3cae..6090c043c732 100644 --- a/test/optimizations/arrayViewElision/basic.compopts +++ b/test/optimizations/arrayViewElision/basic.compopts @@ -1 +1 @@ --sdebugShortArrayTransferOpt=true +--report-array-view-elision -sdebugShortArrayTransferOpt=true diff --git a/test/optimizations/arrayViewElision/basic.good b/test/optimizations/arrayViewElision/basic.good index 058a1b4f8a13..2d4e73ba40b0 100644 --- a/test/optimizations/arrayViewElision/basic.good +++ b/test/optimizations/arrayViewElision/basic.good @@ -1,3 +1,115 @@ +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + +ArrayViewElision supported basic.chpl:5 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + Set first two: Size: 2 Threshold: 50 size qualifies From 7db444775ac5ba7eca6ac6fcd5e4b4c081aafbad Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 15:25:13 -0700 Subject: [PATCH 50/88] Fix an issue with multidimensional slicing Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index c02ab066e210..387133d8e322 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -48,7 +48,7 @@ module ChapelArrayViewElision { var dummyArr = [1,]; this.ptrToArr = c_addrOf(dummyArr); - this.ranges = (1..0,); + this.ranges = 1..0; } proc init(ptrToArr, slicingExprs) { @@ -148,8 +148,9 @@ module ChapelArrayViewElision { } } - proc chpl__createProtoSlice(ref Arr, slicingExprs:range(?) ...) - where chpl__baseTypeSupportAVE(Arr) { + proc chpl__createProtoSlice(ref Arr, slicingExprs ...) + where chpl__baseTypeSupportAVE(Arr) && + chpl__isTupleOfRanges(slicingExprs) { if slicingExprs.size == 1 then return new chpl__protoSlice(c_addrOf(Arr), slicingExprs[0]); else @@ -157,13 +158,16 @@ module ChapelArrayViewElision { } pragma "last resort" - proc chpl__createProtoSlice(const ref Arr, slicingExprs:range(?) ...) - where chpl__baseTypeSupportAVE(Arr) { + proc chpl__createProtoSlice(const ref Arr, slicingExprs ...) + where chpl__baseTypeSupportAVE(Arr) && + chpl__isTupleOfRanges(slicingExprs) { return new chpl__protoSlice(c_addrOfConst(Arr), slicingExprs); } pragma "last resort" proc chpl__createProtoSlice(const ref Arr, slicingExprs... ) { + compilerWarning("uh-oh ", slicingExprs.type:string); + /*chpl_debug_writeln("creating with 30");*/ // this is an array access. This call will be eliminated later in // resolution, but we want it to live for a bit for easier resolution return new chpl__protoSlice(); From 4411fe2ce55159b7f7dd291bebaa2634c243ef05 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 15:37:03 -0700 Subject: [PATCH 51/88] Change a flag, improve a test to cover 2D arrays, too Signed-off-by: Engin Kayraklioglu --- .../internal/ChapelShortArrayTransfer.chpl | 8 +- .../optimizations/arrayViewElision/basic.chpl | 39 ---- .../arrayViewElision/basic.compopts | 1 - .../optimizations/arrayViewElision/basic.good | 190 ------------------ 4 files changed, 4 insertions(+), 234 deletions(-) delete mode 100644 test/optimizations/arrayViewElision/basic.chpl delete mode 100644 test/optimizations/arrayViewElision/basic.compopts delete mode 100644 test/optimizations/arrayViewElision/basic.good diff --git a/modules/internal/ChapelShortArrayTransfer.chpl b/modules/internal/ChapelShortArrayTransfer.chpl index 72290b9c4b40..a0fed45c611b 100644 --- a/modules/internal/ChapelShortArrayTransfer.chpl +++ b/modules/internal/ChapelShortArrayTransfer.chpl @@ -24,9 +24,9 @@ module ChapelShortArrayTransfer { use ChplConfig only CHPL_LOCALE_MODEL; @chpldoc.nodoc - config param debugShortArrayTransferOpt = false; + config param debugShortArrayTransfer = false; @chpldoc.nodoc - config param disableShortArrayTransferOpt = false; + config param disableShortArrayTransfer = false; @chpldoc.nodoc config const shortArrayTransferThreshold = 50; @@ -34,13 +34,13 @@ module ChapelShortArrayTransfer { proc chpl__staticCheckShortArrayTransfer(a, b) param { // this is the case I'm focusing on in the initial PR. This can definitely // be loosened up... by a lot. - return !disableShortArrayTransferOpt && isProtoSlice(a) && isProtoSlice(b); + return !disableShortArrayTransfer && isProtoSlice(a) && isProtoSlice(b); } inline proc chpl__dynamicCheckShortArrayTransfer(a, b) { param localCompilation = _local && CHPL_LOCALE_MODEL=="flat"; const sizeOk = a.sizeAs(uint) < shortArrayTransferThreshold; - if debugShortArrayTransferOpt { + if debugShortArrayTransfer { chpl_debug_writeln(" Size: ", a.sizeAs(uint), " Threshold: ", shortArrayTransferThreshold); if sizeOk then diff --git a/test/optimizations/arrayViewElision/basic.chpl b/test/optimizations/arrayViewElision/basic.chpl deleted file mode 100644 index 4fff7ca551ef..000000000000 --- a/test/optimizations/arrayViewElision/basic.chpl +++ /dev/null @@ -1,39 +0,0 @@ -var A: [1..10] int = 1; -var B: [1..10] int = 2; - -proc test(dstRange, srcRange) { - A[dstRange] = B[srcRange]; - writeln(A); - A = 1; // reset -} - -writeln("Set first two:"); -test(1..2, 1..2); -test(1..2, 9..10); -test(1..2, ..2); -test(1..2, 9..); -test(..2, 1..2); -test(..2, 8..9); -test(..2, ..2); -test(..2, 9..); - -writeln("Set last two:"); -test(9..10, 1..2); -test(9..10, 9..10); -test(9..10, ..2); -test(9..10, 9..); -test(9.., 1..2); -test(9.., 9..10); -test(9.., ..2); -test(9.., 9..); - -writeln("Set all:"); -test(1..10, 1..10); -test(1..10, 1..); -test(1..10, ..); -test(1.., 1..10); -test(1.., 1..); -test(1.., ..); -test(.., 1..10); -test(.., 1..); -test(.., ..); diff --git a/test/optimizations/arrayViewElision/basic.compopts b/test/optimizations/arrayViewElision/basic.compopts deleted file mode 100644 index 6090c043c732..000000000000 --- a/test/optimizations/arrayViewElision/basic.compopts +++ /dev/null @@ -1 +0,0 @@ ---report-array-view-elision -sdebugShortArrayTransferOpt=true diff --git a/test/optimizations/arrayViewElision/basic.good b/test/optimizations/arrayViewElision/basic.good deleted file mode 100644 index 2d4e73ba40b0..000000000000 --- a/test/optimizations/arrayViewElision/basic.good +++ /dev/null @@ -1,190 +0,0 @@ -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),both,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),both,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),both,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),both,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),both,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),both,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),high,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),high,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),high,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),high,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),high,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),high,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),low,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),low,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),low,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),low,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),low,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),low,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),both,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),both,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),low,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),low,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),neither,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),neither,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),neither,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),neither,one) - -ArrayViewElision supported basic.chpl:5 - lhsBaseType: [domain(1,int(64),one)] int(64) - lhsIndexingExprs: - range(int(64),neither,one) - rhsBaseType: [domain(1,int(64),one)] int(64) - rhsIndexingExprs: - range(int(64),neither,one) - -Set first two: - Size: 2 Threshold: 50 - size qualifies -2 2 1 1 1 1 1 1 1 1 - Size: 2 Threshold: 50 - size qualifies -2 2 1 1 1 1 1 1 1 1 - Size: 2 Threshold: 50 - size qualifies -2 2 1 1 1 1 1 1 1 1 - Size: 2 Threshold: 50 - size qualifies -2 2 1 1 1 1 1 1 1 1 - Size: 2 Threshold: 50 - size qualifies -2 2 1 1 1 1 1 1 1 1 - Size: 2 Threshold: 50 - size qualifies -2 2 1 1 1 1 1 1 1 1 - Size: 2 Threshold: 50 - size qualifies -2 2 1 1 1 1 1 1 1 1 - Size: 2 Threshold: 50 - size qualifies -2 2 1 1 1 1 1 1 1 1 -Set last two: - Size: 2 Threshold: 50 - size qualifies -1 1 1 1 1 1 1 1 2 2 - Size: 2 Threshold: 50 - size qualifies -1 1 1 1 1 1 1 1 2 2 - Size: 2 Threshold: 50 - size qualifies -1 1 1 1 1 1 1 1 2 2 - Size: 2 Threshold: 50 - size qualifies -1 1 1 1 1 1 1 1 2 2 - Size: 2 Threshold: 50 - size qualifies -1 1 1 1 1 1 1 1 2 2 - Size: 2 Threshold: 50 - size qualifies -1 1 1 1 1 1 1 1 2 2 - Size: 2 Threshold: 50 - size qualifies -1 1 1 1 1 1 1 1 2 2 - Size: 2 Threshold: 50 - size qualifies -1 1 1 1 1 1 1 1 2 2 -Set all: - Size: 10 Threshold: 50 - size qualifies -2 2 2 2 2 2 2 2 2 2 - Size: 10 Threshold: 50 - size qualifies -2 2 2 2 2 2 2 2 2 2 - Size: 10 Threshold: 50 - size qualifies -2 2 2 2 2 2 2 2 2 2 - Size: 10 Threshold: 50 - size qualifies -2 2 2 2 2 2 2 2 2 2 - Size: 10 Threshold: 50 - size qualifies -2 2 2 2 2 2 2 2 2 2 - Size: 10 Threshold: 50 - size qualifies -2 2 2 2 2 2 2 2 2 2 - Size: 10 Threshold: 50 - size qualifies -2 2 2 2 2 2 2 2 2 2 - Size: 10 Threshold: 50 - size qualifies -2 2 2 2 2 2 2 2 2 2 - Size: 10 Threshold: 50 - size qualifies -2 2 2 2 2 2 2 2 2 2 From d2881b641a48447d0f20a5434901fbc60d4f7f8c Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 15:37:25 -0700 Subject: [PATCH 52/88] Remove a forgotten debug output Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 387133d8e322..ad4ecf7a5075 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -166,8 +166,6 @@ module ChapelArrayViewElision { pragma "last resort" proc chpl__createProtoSlice(const ref Arr, slicingExprs... ) { - compilerWarning("uh-oh ", slicingExprs.type:string); - /*chpl_debug_writeln("creating with 30");*/ // this is an array access. This call will be eliminated later in // resolution, but we want it to live for a bit for easier resolution return new chpl__protoSlice(); From 53ebd1598a44c55527a5a35e1770c9141fc65639 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 15:53:29 -0700 Subject: [PATCH 53/88] Refactor the compiler a bit to be able to handle constness Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/arrayViewElision.cpp | 28 ++++++++++++--------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index d47784f76daa..0d8f9f933ffb 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -27,6 +27,20 @@ static bool exprSuitableForProtoSlice(Expr* e); +static CallExpr* generateCreateProtoSlice(CallExpr* call) { + INT_ASSERT(call); + + SymExpr* base = toSymExpr(call->baseExpr); + INT_ASSERT(base); + + CallExpr* ret = new CallExpr("chpl__createProtoSlice", base->copy()); + for_actuals(actual, call) { + ret->insertAtTail(actual->copy()); + } + + return ret; +} + void arrayViewElision() { if (!fArrayViewElision) return; @@ -51,18 +65,8 @@ void arrayViewElision() { CallExpr* lhs = toCallExpr(call->get(1)); CallExpr* rhs = toCallExpr(call->get(2)); - Expr* lhsBase = lhs->baseExpr; - Expr* rhsBase = rhs->baseExpr; - - CallExpr* lhsPSCall = new CallExpr("chpl__createProtoSlice", lhsBase->copy()); - for_actuals(actual, lhs) { - lhsPSCall->insertAtTail(actual->copy()); - } - - CallExpr* rhsPSCall = new CallExpr("chpl__createProtoSlice", rhsBase->copy()); - for_actuals(actual, rhs) { - rhsPSCall->insertAtTail(actual->copy()); - } + CallExpr* lhsPSCall = generateCreateProtoSlice(lhs); + CallExpr* rhsPSCall = generateCreateProtoSlice(rhs); // arrayview elision placeholder VarSymbol* placeholder = new VarSymbol("arrayview_elision_flag", dtBool); From 1dd3d30ef41fca8acc8543978edbfbeca1af2a3b Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 16:40:48 -0700 Subject: [PATCH 54/88] Add a test that was removed by mistake Signed-off-by: Engin Kayraklioglu --- .../arrayViewElision/slices-1d.good | 243 ++++++++++++ .../arrayViewElision/slices-2d.good | 371 ++++++++++++++++++ .../arrayViewElision/slices.chpl | 63 +++ .../arrayViewElision/slices.compopts | 2 + 4 files changed, 679 insertions(+) create mode 100644 test/optimizations/arrayViewElision/slices-1d.good create mode 100644 test/optimizations/arrayViewElision/slices-2d.good create mode 100644 test/optimizations/arrayViewElision/slices.chpl create mode 100644 test/optimizations/arrayViewElision/slices.compopts diff --git a/test/optimizations/arrayViewElision/slices-1d.good b/test/optimizations/arrayViewElision/slices-1d.good new file mode 100644 index 000000000000..9215b5c29cc3 --- /dev/null +++ b/test/optimizations/arrayViewElision/slices-1d.good @@ -0,0 +1,243 @@ +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + +Set first two: + Size: 2 Threshold: 50 + size qualifies +Test 1 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 2 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 3 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 4 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 5 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 6 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 7 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 8 +2 2 1 1 1 + +----------------- +Set last two: + Size: 2 Threshold: 50 + size qualifies +Test 9 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 10 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 11 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 12 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 13 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 14 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 15 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 16 +1 1 1 2 2 + +----------------- +Set all: + Size: 5 Threshold: 50 + size qualifies +Test 17 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 18 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 19 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 20 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 21 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 22 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 23 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 24 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 25 +2 2 2 2 2 + +----------------- diff --git a/test/optimizations/arrayViewElision/slices-2d.good b/test/optimizations/arrayViewElision/slices-2d.good new file mode 100644 index 000000000000..e2ce44ad301d --- /dev/null +++ b/test/optimizations/arrayViewElision/slices-2d.good @@ -0,0 +1,371 @@ +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + range(int(64),high,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + range(int(64),high,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + range(int(64),high,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + range(int(64),high,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + range(int(64),high,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + range(int(64),high,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + range(int(64),neither,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + range(int(64),neither,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + range(int(64),neither,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + range(int(64),neither,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + range(int(64),neither,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + range(int(64),neither,one) + +Set first two: + Size: 4 Threshold: 50 + size qualifies +Test 1 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 2 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 3 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 4 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 5 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 6 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 7 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 8 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + +----------------- +Set last two: + Size: 4 Threshold: 50 + size qualifies +Test 9 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 10 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 11 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 12 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 13 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 14 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 15 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 16 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + +----------------- +Set all: + Size: 25 Threshold: 50 + size qualifies +Test 17 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 18 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 19 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 20 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 21 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 22 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 23 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 24 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 25 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + +----------------- diff --git a/test/optimizations/arrayViewElision/slices.chpl b/test/optimizations/arrayViewElision/slices.chpl new file mode 100644 index 000000000000..17911a8de87c --- /dev/null +++ b/test/optimizations/arrayViewElision/slices.chpl @@ -0,0 +1,63 @@ +config param rank = 1; + +proc multuplify(param rank, x) { + var ret: rank*x.type; + for param i in 0.. Date: Wed, 26 Jun 2024 16:42:38 -0700 Subject: [PATCH 55/88] Pay attention to constness, add test Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/arrayViewElision.cpp | 38 ++++++---- modules/internal/ChapelArrayViewElision.chpl | 75 +++++++++++++++++-- .../arrayViewElision/constChecking-error.good | 1 + .../constChecking-no-error.good | 1 + .../arrayViewElision/constChecking.chpl | 16 ++++ .../arrayViewElision/constChecking.compopts | 2 + 6 files changed, 109 insertions(+), 24 deletions(-) create mode 100644 test/optimizations/arrayViewElision/constChecking-error.good create mode 100644 test/optimizations/arrayViewElision/constChecking-no-error.good create mode 100644 test/optimizations/arrayViewElision/constChecking.chpl create mode 100644 test/optimizations/arrayViewElision/constChecking.compopts diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 0d8f9f933ffb..f022218159cf 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -25,7 +25,18 @@ #include "passes.h" #include "resolution.h" -static bool exprSuitableForProtoSlice(Expr* e); +static bool exprSuitableForProtoSlice(Expr* e, bool isLhs) { + if (CallExpr* call = toCallExpr(e)) { + if (SymExpr* callBase = toSymExpr(call->baseExpr)) { + if (!isFnSymbol(callBase->symbol()) && + (!isLhs || !callBase->symbol()->isConstant())) { + return true; + } + } + } + return false; +} + static CallExpr* generateCreateProtoSlice(CallExpr* call) { INT_ASSERT(call); @@ -33,7 +44,12 @@ static CallExpr* generateCreateProtoSlice(CallExpr* call) { SymExpr* base = toSymExpr(call->baseExpr); INT_ASSERT(base); - CallExpr* ret = new CallExpr("chpl__createProtoSlice", base->copy()); + const bool isConst = base->symbol()->isConstant(); + + const char* factory = isConst ? "chpl__createConstProtoSlice" : + "chpl__createProtoSlice"; + + CallExpr* ret = new CallExpr(factory, base->copy()); for_actuals(actual, call) { ret->insertAtTail(actual->copy()); } @@ -49,8 +65,8 @@ void arrayViewElision() { for_alive_in_Vec (CallExpr, call, gCallExprs) { if (call->getModule()->modTag == MOD_USER) { if (call->isNamed("=")) { - if (exprSuitableForProtoSlice(call->get(1)) && - exprSuitableForProtoSlice(call->get(2))) { + if (exprSuitableForProtoSlice(call->get(1), /* isLhs */ true) && + exprSuitableForProtoSlice(call->get(2), /* isLhs */ false)) { //std::cout << call->stringLoc() << std::endl; //nprint_view(call); candidates.push_back(call); @@ -156,7 +172,8 @@ void ProtoSliceAssignHelper::report() { } bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call, bool isLhs) { - INT_ASSERT(call->isNamed("chpl__createProtoSlice")); + INT_ASSERT(call->isNamed("chpl__createProtoSlice") || + call->isNamed("chpl__createConstProtoSlice")); // stash some information while working on the call if (fReportArrayViewElision) { @@ -251,14 +268,3 @@ void ProtoSliceAssignHelper::findCondStmt() { } } -static bool exprSuitableForProtoSlice(Expr* e) { - if (CallExpr* call = toCallExpr(e)) { - if (SymExpr* callBase = toSymExpr(call->baseExpr)) { - if (!isFnSymbol(callBase->symbol())) { - return true; - } - } - } - return false; -} - diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index ad4ecf7a5075..1bf8d2b2ad2a 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -36,6 +36,7 @@ module ChapelArrayViewElision { record chpl__protoSlice { param rank; + param isConst; type idxType; var ptrToArr; // I want this to be a `forwarding ref` to the array var ranges; @@ -44,6 +45,7 @@ module ChapelArrayViewElision { // this constructor is called to create dummy protoSlices that will never // be used and removed from the AST. this.rank = 1; + this.isConst = true; this.idxType = int; var dummyArr = [1,]; @@ -51,8 +53,9 @@ module ChapelArrayViewElision { this.ranges = 1..0; } - proc init(ptrToArr, slicingExprs) { + proc init(param isConst, ptrToArr, slicingExprs) { this.rank = ptrToArr.deref().rank; + this.isConst = isConst; this.idxType = ptrToArr.deref().idxType; this.ptrToArr = ptrToArr; if allBounded(slicingExprs) { @@ -70,6 +73,7 @@ module ChapelArrayViewElision { proc init=(other: chpl__protoSlice) { this.rank = other.rank; + this.isConst = other.isConst; this.idxType = other.idxType; this.ptrToArr = other.ptrToArr; this.ranges = other.ranges; @@ -111,7 +115,30 @@ module ChapelArrayViewElision { } inline proc isRectangular() param { return ptrToArr.deref().isRectangular(); } - iter these() ref { + iter these() ref where !isConst { + if rank == 1 then { + foreach elem in chpl__serialViewIter1D(ptrToArr.deref()._instance, + domOrRange) { + yield elem; + } + } + else { + + /* + Storing `inst` here and iterating over `inst` doesn't seem to work. + Check the arrays primer for how that causes issues. Potentially an + iterator inlining issue, or memory cleanup going sideways. + + const inst = domOrRange._instance; + */ + foreach elem in chpl__serialViewIter(ptrToArr.deref()._instance, + domOrRange._instance) { + yield elem; + } + } + } + + iter these() const ref where isConst { if rank == 1 then { foreach elem in chpl__serialViewIter1D(ptrToArr.deref()._instance, domOrRange) { @@ -140,28 +167,55 @@ module ChapelArrayViewElision { } } - iter these(param tag: iterKind, followThis) ref where tag==iterKind.follower { + iter these(param tag: iterKind, followThis) ref + where tag==iterKind.follower && !isConst { ref arr = ptrToArr.deref(); foreach i in domOrRange.these(iterKind.follower, followThis) { yield arr[i]; } } + iter these(param tag: iterKind, followThis) const ref + where tag==iterKind.follower && isConst{ + const ref arr = ptrToArr.deref(); + foreach i in domOrRange.these(iterKind.follower, followThis) { + yield arr[i]; + } + } + } + + operator ==(const ref lhs: chpl__protoSlice(?), + const ref rhs: chpl__protoSlice(?)) { + return lhs.rank == rhs.rank && + lhs.ptrToArr == rhs.ptrToArr && + lhs.ranges == rhs.ranges; } proc chpl__createProtoSlice(ref Arr, slicingExprs ...) where chpl__baseTypeSupportAVE(Arr) && chpl__isTupleOfRanges(slicingExprs) { if slicingExprs.size == 1 then - return new chpl__protoSlice(c_addrOf(Arr), slicingExprs[0]); + return new chpl__protoSlice(isConst=false, c_addrOf(Arr), + slicingExprs[0]); else - return new chpl__protoSlice(c_addrOf(Arr), slicingExprs); + return new chpl__protoSlice(isConst=false, c_addrOf(Arr), slicingExprs); } - pragma "last resort" - proc chpl__createProtoSlice(const ref Arr, slicingExprs ...) + proc chpl__createConstProtoSlice(const ref Arr, slicingExprs ...) where chpl__baseTypeSupportAVE(Arr) && chpl__isTupleOfRanges(slicingExprs) { - return new chpl__protoSlice(c_addrOfConst(Arr), slicingExprs); + if slicingExprs.size == 1 { + return new chpl__protoSlice(isConst=true, c_addrOfConst(Arr), + slicingExprs[0]); + } + else + return new chpl__protoSlice(isConst=true, c_addrOfConst(Arr), slicingExprs); + } + + pragma "last resort" + proc chpl__createProtoSlice(ref Arr, slicingExprs... ) { + // this is an array access. This call will be eliminated later in + // resolution, but we want it to live for a bit for easier resolution + return new chpl__protoSlice(); } pragma "last resort" @@ -171,6 +225,7 @@ module ChapelArrayViewElision { return new chpl__protoSlice(); } + proc chpl__baseTypeSupportAVE(base) param: bool { import Reflection; return isArray(base) && // also could be a view? @@ -218,4 +273,8 @@ module ChapelArrayViewElision { compilerError("Unexpected type to allBounded"); return false; } + + inline operator :(ref a: chpl__protoSlice, type b: chpl__protoSlice) { + compilerError("Should never cast proto slices"); + } } diff --git a/test/optimizations/arrayViewElision/constChecking-error.good b/test/optimizations/arrayViewElision/constChecking-error.good new file mode 100644 index 000000000000..389fe0924fc2 --- /dev/null +++ b/test/optimizations/arrayViewElision/constChecking-error.good @@ -0,0 +1 @@ +constChecking.chpl:14: error: cannot assign to const variable diff --git a/test/optimizations/arrayViewElision/constChecking-no-error.good b/test/optimizations/arrayViewElision/constChecking-no-error.good new file mode 100644 index 000000000000..020ad234dda5 --- /dev/null +++ b/test/optimizations/arrayViewElision/constChecking-no-error.good @@ -0,0 +1 @@ +0 1 1 0 0 diff --git a/test/optimizations/arrayViewElision/constChecking.chpl b/test/optimizations/arrayViewElision/constChecking.chpl new file mode 100644 index 000000000000..b043d6c66ed4 --- /dev/null +++ b/test/optimizations/arrayViewElision/constChecking.chpl @@ -0,0 +1,16 @@ +config param shouldError = false; + +if !shouldError { + var A: [1..5] int; + const B: [1..5] int = 1; + + A[2..3] = B[2..3]; + writeln(A); +} +else { + const A: [1..5] int; + const B: [1..5] int = 1; + + A[2..3] = B[2..3]; + writeln(A); +} diff --git a/test/optimizations/arrayViewElision/constChecking.compopts b/test/optimizations/arrayViewElision/constChecking.compopts new file mode 100644 index 000000000000..04b76300be9d --- /dev/null +++ b/test/optimizations/arrayViewElision/constChecking.compopts @@ -0,0 +1,2 @@ +-sshouldError=false # constChecking-no-error +-sshouldError=true # constChecking-error From 4999774b8fae786586fba4e640921cfae3dab041 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 17:16:28 -0700 Subject: [PATCH 56/88] Fix a last-resort function Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 1bf8d2b2ad2a..c095e4d42ce3 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -219,7 +219,7 @@ module ChapelArrayViewElision { } pragma "last resort" - proc chpl__createProtoSlice(const ref Arr, slicingExprs... ) { + proc chpl__createConstProtoSlice(const ref Arr, slicingExprs... ) { // this is an array access. This call will be eliminated later in // resolution, but we want it to live for a bit for easier resolution return new chpl__protoSlice(); From d7eaa66e480d08c5b461050af85297a5f55765ec Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 21:15:50 -0700 Subject: [PATCH 57/88] Fix a bug exposed by negative-stride slice warning Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/arrayViewElision.cpp | 29 ++++++++++++------- modules/internal/ChapelArrayViewElision.chpl | 2 ++ .../arrayViewElision/negativeStrideSlice.chpl | 11 +++++++ .../negativeStrideSlice.compopts | 1 + .../arrayViewElision/negativeStrideSlice.good | 19 ++++++++++++ 5 files changed, 51 insertions(+), 11 deletions(-) create mode 100644 test/optimizations/arrayViewElision/negativeStrideSlice.chpl create mode 100644 test/optimizations/arrayViewElision/negativeStrideSlice.compopts create mode 100644 test/optimizations/arrayViewElision/negativeStrideSlice.good diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index f022218159cf..5d05fbf3ee0e 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -24,6 +24,7 @@ #include "global-ast-vecs.h" #include "passes.h" #include "resolution.h" +#include "view.h" static bool exprSuitableForProtoSlice(Expr* e, bool isLhs) { if (CallExpr* call = toCallExpr(e)) { @@ -196,6 +197,7 @@ bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call, bool isLhs) { CallExpr* typeCheck = new CallExpr("chpl__typesSupportArrayViewElision"); for_actuals (actual, call) { + //nprint_view(actual); INT_ASSERT(isSymExpr(actual)); typeCheck->insertAtTail(actual->copy()); } @@ -218,24 +220,29 @@ bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call, bool isLhs) { // e must be the lhs or rhs of PRIM_ASSIGN_PROTO_SLICES // returns the `chpl__createProtoSlice call CallExpr* ProtoSliceAssignHelper::findOneProtoSliceCall(Expr* e) { - SymExpr* lhsSE = toSymExpr(call_->get(1)); - INT_ASSERT(lhsSE); + SymExpr* symExpr = toSymExpr(e); + INT_ASSERT(symExpr); - Symbol* lhs = lhsSE->symbol(); - CallExpr* lhsTmpMove = toCallExpr(lhs->getSingleDef()->getStmtExpr()); - INT_ASSERT(lhsTmpMove && lhsTmpMove->isPrimitive(PRIM_MOVE)); + Symbol* sym = symExpr->symbol(); + CallExpr* tmpMove = toCallExpr(sym->getSingleDef()->getStmtExpr()); + INT_ASSERT(tmpMove && tmpMove->isPrimitive(PRIM_MOVE)); - SymExpr* lhsTmpSE = toSymExpr(lhsTmpMove->get(2)); - INT_ASSERT(lhsTmpSE); + //nprint_view(lhsTmpMove); - Symbol* lhsTmpSym = lhsTmpSE->symbol(); - CallExpr* lhsMove = toCallExpr(lhsTmpSym->getSingleDef()->getStmtExpr()); - INT_ASSERT(lhsMove && lhsMove->isPrimitive(PRIM_MOVE)); + SymExpr* tmpSymExpr = toSymExpr(tmpMove->get(2)); + INT_ASSERT(tmpSymExpr); - return toCallExpr(lhsMove->get(2)); + Symbol* tmpSym = tmpSymExpr->symbol(); + CallExpr* move = toCallExpr(tmpSym->getSingleDef()->getStmtExpr()); + INT_ASSERT(move && move->isPrimitive(PRIM_MOVE)); + + //nprint_view(lhsMove); + + return toCallExpr(move->get(2)); } void ProtoSliceAssignHelper::findProtoSlices() { + //nprint_view(call_); newProtoSliceLhs_ = findOneProtoSliceCall(call_->get(1)); newProtoSliceRhs_ = findOneProtoSliceCall(call_->get(2)); } diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index c095e4d42ce3..486621429160 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -233,6 +233,8 @@ module ChapelArrayViewElision { Reflection.canResolve("c_addrOf", base); } + extern proc printf(s...); + proc chpl__indexingExprsSupportAVE(indexingExprs...) param: bool { for param tid in 0.. Date: Wed, 26 Jun 2024 21:26:15 -0700 Subject: [PATCH 58/88] Adjust createProtoSlice functions to avoid handling string/bytes Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 486621429160..c76276abcff0 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -211,17 +211,17 @@ module ChapelArrayViewElision { return new chpl__protoSlice(isConst=true, c_addrOfConst(Arr), slicingExprs); } - pragma "last resort" - proc chpl__createProtoSlice(ref Arr, slicingExprs... ) { - // this is an array access. This call will be eliminated later in - // resolution, but we want it to live for a bit for easier resolution + // catch-all: nothing here is supported, just pretend creating a proto slice. + // The branch where this call is will be dropped during resolution. We just + // want to avoid resolution errors before that happens + proc chpl__createProtoSlice(x, slicingExprs... ) { return new chpl__protoSlice(); } - pragma "last resort" - proc chpl__createConstProtoSlice(const ref Arr, slicingExprs... ) { - // this is an array access. This call will be eliminated later in - // resolution, but we want it to live for a bit for easier resolution + // catch-all: nothing here is supported, just pretend creating a proto slice. + // The branch where this call is will be dropped during resolution. We just + // want to avoid resolution errors before that happens + proc chpl__createConstProtoSlice(x, slicingExprs... ) { return new chpl__protoSlice(); } From a5391b1ada3f7fd00709b31e5eeea6a7aa5700d9 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 21:28:49 -0700 Subject: [PATCH 59/88] Drop an unused field Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index c76276abcff0..344757d05f27 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -37,7 +37,6 @@ module ChapelArrayViewElision { record chpl__protoSlice { param rank; param isConst; - type idxType; var ptrToArr; // I want this to be a `forwarding ref` to the array var ranges; @@ -46,7 +45,6 @@ module ChapelArrayViewElision { // be used and removed from the AST. this.rank = 1; this.isConst = true; - this.idxType = int; var dummyArr = [1,]; this.ptrToArr = c_addrOf(dummyArr); @@ -56,7 +54,6 @@ module ChapelArrayViewElision { proc init(param isConst, ptrToArr, slicingExprs) { this.rank = ptrToArr.deref().rank; this.isConst = isConst; - this.idxType = ptrToArr.deref().idxType; this.ptrToArr = ptrToArr; if allBounded(slicingExprs) { this.ranges = slicingExprs; @@ -74,7 +71,6 @@ module ChapelArrayViewElision { proc init=(other: chpl__protoSlice) { this.rank = other.rank; this.isConst = other.isConst; - this.idxType = other.idxType; this.ptrToArr = other.ptrToArr; this.ranges = other.ranges; init this; From f2821e953b4732d17c271996b9456bc6a791de1a Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 26 Jun 2024 22:29:22 -0700 Subject: [PATCH 60/88] Properly handle remote arrays, add test Signed-off-by: Engin Kayraklioglu --- compiler/include/arrayViewElision.h | 6 ++- compiler/optimizations/arrayViewElision.cpp | 41 ++++++++++++++++++- compiler/resolution/preFold.cpp | 7 +--- modules/internal/ChapelArrayViewElision.chpl | 11 +++++ .../internal/ChapelShortArrayTransfer.chpl | 1 + .../localeModels/flat/chpl-locale-model.h | 5 +++ .../localeModels/gpu/chpl-locale-model.h | 7 ++++ .../arrayViewElision/remoteDR.chpl | 9 ++++ .../arrayViewElision/remoteDR.compopts | 1 + .../arrayViewElision/remoteDR.good | 19 +++++++++ .../arrayViewElision/remoteDR.hcpl | 1 + .../arrayViewElision/remoteDR.numlocales | 1 + 12 files changed, 101 insertions(+), 8 deletions(-) create mode 100644 test/optimizations/arrayViewElision/remoteDR.chpl create mode 100644 test/optimizations/arrayViewElision/remoteDR.compopts create mode 100644 test/optimizations/arrayViewElision/remoteDR.good create mode 100644 test/optimizations/arrayViewElision/remoteDR.hcpl create mode 100644 test/optimizations/arrayViewElision/remoteDR.numlocales diff --git a/compiler/include/arrayViewElision.h b/compiler/include/arrayViewElision.h index d5f448b24be0..2606b0d34050 100644 --- a/compiler/include/arrayViewElision.h +++ b/compiler/include/arrayViewElision.h @@ -33,15 +33,17 @@ class ProtoSliceAssignHelper { ~ProtoSliceAssignHelper(); inline CondStmt* condStmt() const { return condStmt_; } - inline Expr* flag() const { return condStmt_->condExpr; } inline bool supported() const { return supported_; } inline BlockStmt* staticCheckBlock() const { return staticCheckBlock_; } CallExpr* getReplacement(); void report(); + void updateAndFoldConditional(); private: CallExpr* call_; + Symbol* lhsBase_; + Symbol* rhsBase_; CallExpr* newProtoSliceLhs_; CallExpr* newProtoSliceRhs_; CondStmt* condStmt_; @@ -60,4 +62,6 @@ class ProtoSliceAssignHelper { void findProtoSlices(); bool handleOneProtoSlice(CallExpr* call, bool isLhs); CallExpr* findOneProtoSliceCall(Expr* e); + Symbol* getFlagReplacement(); }; + diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 5d05fbf3ee0e..d7e99a19a0f2 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -150,12 +150,45 @@ CallExpr* ProtoSliceAssignHelper::getReplacement() { return new CallExpr("=", call_->get(1)->copy(), call_->get(2)->copy()); } +Symbol* ProtoSliceAssignHelper::getFlagReplacement() { + if (!supported_) { + // we can't optimize + return gFalse; + } + else if (fLocal) { + // no need to check anything else, we can determine this statically + return gTrue; + } + else { + INT_ASSERT(lhsBase_); + INT_ASSERT(rhsBase_); + // we need to check if the arrays are on the same locale at run time + CallExpr* localeCheck = new CallExpr("chpl__bothLocal", lhsBase_, rhsBase_); + + VarSymbol* flagTmp = newTemp(dtBool); + DefExpr* flagDef = new DefExpr(flagTmp); + + condStmt_->insertBefore(flagDef); + condStmt_->insertBefore(new CallExpr(PRIM_MOVE, flagTmp, localeCheck)); + + resolveExpr(localeCheck); + resolveExpr(flagDef); + return flagTmp; + } +} + +void ProtoSliceAssignHelper::updateAndFoldConditional() { + condStmt_->condExpr->replace(new SymExpr(getFlagReplacement())); + condStmt_->foldConstantCondition(/*addEndOfStatement*/ false); +} + void ProtoSliceAssignHelper::report() { if (!fReportArrayViewElision) return; std::string isSupported = supported() ? "supported" : "not supported"; - std::cout << "ArrayViewElision " << isSupported << " " << call_->stringLoc() - << std::endl; + std::string isDynamic = !fLocal ? "(dynamic locality check required)" : ""; + std::cout << "ArrayViewElision " << isSupported << " " << isDynamic << " " << + call_->stringLoc() << std::endl; std::cout << "\t" << "lhsBaseType: " << lhsBaseType_ << std::endl; std::cout << "\t" << "lhsIndexingExprs: " << std::endl; @@ -214,6 +247,10 @@ bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call, bool isLhs) { flagDef->remove(); + // record the base symbol here for further checks + Symbol*& baseToCapture = isLhs ? lhsBase_ : rhsBase_; + baseToCapture = toSymExpr(call->get(1))->symbol(); + return ret; } diff --git a/compiler/resolution/preFold.cpp b/compiler/resolution/preFold.cpp index dc9634b49d23..9be68ec22aef 100644 --- a/compiler/resolution/preFold.cpp +++ b/compiler/resolution/preFold.cpp @@ -920,20 +920,17 @@ static Expr* preFoldPrimOp(CallExpr* call) { case PRIM_PROTO_SLICE_ASSIGN: { ProtoSliceAssignHelper assignment(call); - assignment.report(); - if (assignment.supported()) { retval = assignment.getReplacement(); call->replace(retval); - assignment.flag()->replace(new SymExpr(gTrue)); } else { retval = new CallExpr(PRIM_NOOP); assignment.condStmt()->insertBefore(retval); - assignment.flag()->replace(new SymExpr(gFalse)); } - assignment.condStmt()->foldConstantCondition(/*addEndOfStatement*/ false); + assignment.report(); + assignment.updateAndFoldConditional(); break; } diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 344757d05f27..487dc037a9a8 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -24,6 +24,7 @@ module ChapelArrayViewElision { use DefaultRectangular; use CTypes; + // TODO the following can be removed? proc isProtoSlice(a) param { return isSubtype(a.type, chpl__protoSlice); } proc isArrayOrProtoSlice(a) param { @@ -249,6 +250,16 @@ module ChapelArrayViewElision { return true; } + inline proc chpl__bothLocal(const ref a, const ref b) { + extern proc chpl_equals_localeID(const ref x, const ref y): bool; + + const aLoc = __primitive("_wide_get_locale", a._value); + const bLoc = __primitive("_wide_get_locale", b._value); + + return chpl_equals_localeID(aLoc, bLoc) && + chpl_equals_localeID(aLoc, here_id); + } + proc chpl__typesSupportArrayViewElision(base, indexingExprs...) param: bool { return chpl__baseTypeSupportAVE(base) && diff --git a/modules/internal/ChapelShortArrayTransfer.chpl b/modules/internal/ChapelShortArrayTransfer.chpl index a0fed45c611b..812178b6e326 100644 --- a/modules/internal/ChapelShortArrayTransfer.chpl +++ b/modules/internal/ChapelShortArrayTransfer.chpl @@ -54,6 +54,7 @@ module ChapelShortArrayTransfer { else { // No `.locale` to avoid overheads. Note that this is an optimization for // fast-running code. Small things matter. + // TODO, use chpl__bothLocal const sameLocale = __primitive("_wide_get_locale", a) == __primitive("_wide_get_locale", b); if sameLocale then diff --git a/runtime/include/localeModels/flat/chpl-locale-model.h b/runtime/include/localeModels/flat/chpl-locale-model.h index 779ba55f0bc6..3636eb82a188 100644 --- a/runtime/include/localeModels/flat/chpl-locale-model.h +++ b/runtime/include/localeModels/flat/chpl-locale-model.h @@ -71,6 +71,11 @@ c_sublocid_t chpl_rt_sublocFromLocaleID(chpl_localeID_t loc) { return c_sublocid_any; } +static inline +int chpl_equals_localeID(chpl_localeID_t* loc1, chpl_localeID_t* loc2) { + return loc1->node == loc2->node; +} + // // These functions are exported from the locale model for use by // the tasking layer to convert between a full sublocale and an diff --git a/runtime/include/localeModels/gpu/chpl-locale-model.h b/runtime/include/localeModels/gpu/chpl-locale-model.h index 0a4cf6a92b66..12d68cbd97c5 100644 --- a/runtime/include/localeModels/gpu/chpl-locale-model.h +++ b/runtime/include/localeModels/gpu/chpl-locale-model.h @@ -71,6 +71,13 @@ c_sublocid_t chpl_rt_sublocFromLocaleID(chpl_localeID_t loc) { return loc.subloc; } +static inline +bool chpl_equals_localeID(chpl_localeID_t* loc1, (chpl_localeID_t* loc2) { + return loc1->node == loc2->node && + loc1->subloc == loc2->subloc; +} + + // // These functions are exported from the locale model for use by // the tasking layer to convert between a full sublocale and an diff --git a/test/optimizations/arrayViewElision/remoteDR.chpl b/test/optimizations/arrayViewElision/remoteDR.chpl new file mode 100644 index 000000000000..fa0ddfe8a688 --- /dev/null +++ b/test/optimizations/arrayViewElision/remoteDR.chpl @@ -0,0 +1,9 @@ +var A: [1..10] int; + +on Locales[1] { + var B: [1..10] int = 1; + + A[1..5] = B[1..5]; + + writeln(A); +} diff --git a/test/optimizations/arrayViewElision/remoteDR.compopts b/test/optimizations/arrayViewElision/remoteDR.compopts new file mode 100644 index 000000000000..6db3d3c1e9d2 --- /dev/null +++ b/test/optimizations/arrayViewElision/remoteDR.compopts @@ -0,0 +1 @@ +--report-array-view-elision -sdebugBulkTransfer diff --git a/test/optimizations/arrayViewElision/remoteDR.good b/test/optimizations/arrayViewElision/remoteDR.good new file mode 100644 index 000000000000..cb6a7321d8d6 --- /dev/null +++ b/test/optimizations/arrayViewElision/remoteDR.good @@ -0,0 +1,19 @@ +ArrayViewElision supported (dynamic locality check required) remoteDR.chpl:6 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +operator =(a:[],b:[]): in chpl__bulkTransferArray +operator =(a:[],b:[]): attempting doiBulkTransferFromKnown +operator =(a:[],b:[]): in chpl__bulkTransferArray +operator =(a:[],b:[]): attempting doiBulkTransferToKnown +operator =(a:[],b:[]): in chpl__bulkTransferArray +operator =(a:[],b:[]): attempting doiBulkTransferFromKnown +In DefaultRectangular._simpleTransfer(): Alo=(1,), Blo=(1,), len=5, elemSize=8 +operator =(a:[],b:[]): successfully completed bulk transfer +operator =(a:[],b:[]): successfully completed bulk transfer +operator =(a:[],b:[]): successfully completed bulk transfer +1 1 1 1 1 0 0 0 0 0 diff --git a/test/optimizations/arrayViewElision/remoteDR.hcpl b/test/optimizations/arrayViewElision/remoteDR.hcpl new file mode 100644 index 000000000000..90661451f784 --- /dev/null +++ b/test/optimizations/arrayViewElision/remoteDR.hcpl @@ -0,0 +1 @@ +var A: diff --git a/test/optimizations/arrayViewElision/remoteDR.numlocales b/test/optimizations/arrayViewElision/remoteDR.numlocales new file mode 100644 index 000000000000..0cfbf08886fc --- /dev/null +++ b/test/optimizations/arrayViewElision/remoteDR.numlocales @@ -0,0 +1 @@ +2 From 73b1cd55c750514be40a19b9f562111a448ab6c4 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 27 Jun 2024 10:04:31 -0700 Subject: [PATCH 61/88] Remove some stray printfs Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 487dc037a9a8..0770244519b3 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -75,8 +75,7 @@ module ChapelArrayViewElision { this.ptrToArr = other.ptrToArr; this.ranges = other.ranges; init this; - extern proc printf(s...); - printf("this is probably not what you want\n"); + halt("protoSlice copy initializer should never be called"); } inline proc domOrRange where rank==1 { @@ -230,8 +229,6 @@ module ChapelArrayViewElision { Reflection.canResolve("c_addrOf", base); } - extern proc printf(s...); - proc chpl__indexingExprsSupportAVE(indexingExprs...) param: bool { for param tid in 0.. Date: Thu, 27 Jun 2024 10:07:20 -0700 Subject: [PATCH 62/88] Add a missing line for line numbers Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArray.chpl | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/internal/ChapelArray.chpl b/modules/internal/ChapelArray.chpl index 74ce0994f003..0d586bcb1814 100644 --- a/modules/internal/ChapelArray.chpl +++ b/modules/internal/ChapelArray.chpl @@ -2161,6 +2161,7 @@ module ChapelArray { } } + pragma "find user line" private inline proc arrayOrProtoSliceAssign(ref a, b) { if a.rank != b.rank then compilerError("rank mismatch in array assignment"); From 202df0739be3dfa35cad05dc2b8910c2c7faa569 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 27 Jun 2024 15:36:31 -0700 Subject: [PATCH 63/88] Some trivial test fixes Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/arrayViewElision.cpp | 4 +- .../arrayViewElision/remoteDR.chpl | 2 +- .../arrayViewElision/remoteDR.comm-none.good | 9 + .../arrayViewElision/slices-1d.comm-none.good | 243 ++++++++++++ .../arrayViewElision/slices-1d.good | 53 ++- .../arrayViewElision/slices-2d.comm-none.good | 371 ++++++++++++++++++ .../arrayViewElision/slices-2d.good | 53 ++- 7 files changed, 704 insertions(+), 31 deletions(-) create mode 100644 test/optimizations/arrayViewElision/remoteDR.comm-none.good create mode 100644 test/optimizations/arrayViewElision/slices-1d.comm-none.good create mode 100644 test/optimizations/arrayViewElision/slices-2d.comm-none.good diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index d7e99a19a0f2..8bc72b0c7fad 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -186,8 +186,8 @@ void ProtoSliceAssignHelper::report() { if (!fReportArrayViewElision) return; std::string isSupported = supported() ? "supported" : "not supported"; - std::string isDynamic = !fLocal ? "(dynamic locality check required)" : ""; - std::cout << "ArrayViewElision " << isSupported << " " << isDynamic << " " << + std::string isDynamic = !fLocal ? " (dynamic locality check required)" : ""; + std::cout << "ArrayViewElision " << isSupported << isDynamic << " " << call_->stringLoc() << std::endl; std::cout << "\t" << "lhsBaseType: " << lhsBaseType_ << std::endl; diff --git a/test/optimizations/arrayViewElision/remoteDR.chpl b/test/optimizations/arrayViewElision/remoteDR.chpl index fa0ddfe8a688..a21ef15c6712 100644 --- a/test/optimizations/arrayViewElision/remoteDR.chpl +++ b/test/optimizations/arrayViewElision/remoteDR.chpl @@ -1,6 +1,6 @@ var A: [1..10] int; -on Locales[1] { +on Locales.last { var B: [1..10] int = 1; A[1..5] = B[1..5]; diff --git a/test/optimizations/arrayViewElision/remoteDR.comm-none.good b/test/optimizations/arrayViewElision/remoteDR.comm-none.good new file mode 100644 index 000000000000..4160b8636bee --- /dev/null +++ b/test/optimizations/arrayViewElision/remoteDR.comm-none.good @@ -0,0 +1,9 @@ +ArrayViewElision supported remoteDR.chpl:6 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +1 1 1 1 1 0 0 0 0 0 diff --git a/test/optimizations/arrayViewElision/slices-1d.comm-none.good b/test/optimizations/arrayViewElision/slices-1d.comm-none.good new file mode 100644 index 000000000000..9215b5c29cc3 --- /dev/null +++ b/test/optimizations/arrayViewElision/slices-1d.comm-none.good @@ -0,0 +1,243 @@ +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + +Set first two: + Size: 2 Threshold: 50 + size qualifies +Test 1 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 2 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 3 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 4 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 5 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 6 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 7 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 8 +2 2 1 1 1 + +----------------- +Set last two: + Size: 2 Threshold: 50 + size qualifies +Test 9 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 10 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 11 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 12 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 13 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 14 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 15 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 16 +1 1 1 2 2 + +----------------- +Set all: + Size: 5 Threshold: 50 + size qualifies +Test 17 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 18 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 19 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 20 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 21 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 22 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 23 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 24 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 25 +2 2 2 2 2 + +----------------- diff --git a/test/optimizations/arrayViewElision/slices-1d.good b/test/optimizations/arrayViewElision/slices-1d.good index 9215b5c29cc3..53b2be34c324 100644 --- a/test/optimizations/arrayViewElision/slices-1d.good +++ b/test/optimizations/arrayViewElision/slices-1d.good @@ -1,4 +1,4 @@ -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -6,7 +6,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -14,7 +14,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -22,7 +22,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -30,7 +30,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),high,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -38,7 +38,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),high,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -46,7 +46,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),high,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -54,7 +54,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -62,7 +62,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -70,7 +70,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -78,7 +78,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -86,7 +86,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -94,7 +94,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),neither,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -102,7 +102,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),neither,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -113,41 +113,49 @@ ArrayViewElision supported slices.chpl:21 Set first two: Size: 2 Threshold: 50 size qualifies + locality qualifies Test 1 2 2 1 1 1 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 2 2 2 1 1 1 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 3 2 2 1 1 1 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 4 2 2 1 1 1 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 5 2 2 1 1 1 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 6 2 2 1 1 1 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 7 2 2 1 1 1 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 8 2 2 1 1 1 @@ -155,41 +163,49 @@ Test 8 Set last two: Size: 2 Threshold: 50 size qualifies + locality qualifies Test 9 1 1 1 2 2 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 10 1 1 1 2 2 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 11 1 1 1 2 2 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 12 1 1 1 2 2 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 13 1 1 1 2 2 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 14 1 1 1 2 2 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 15 1 1 1 2 2 Size: 2 Threshold: 50 size qualifies + locality qualifies Test 16 1 1 1 2 2 @@ -197,46 +213,55 @@ Test 16 Set all: Size: 5 Threshold: 50 size qualifies + locality qualifies Test 17 2 2 2 2 2 Size: 5 Threshold: 50 size qualifies + locality qualifies Test 18 2 2 2 2 2 Size: 5 Threshold: 50 size qualifies + locality qualifies Test 19 2 2 2 2 2 Size: 5 Threshold: 50 size qualifies + locality qualifies Test 20 2 2 2 2 2 Size: 5 Threshold: 50 size qualifies + locality qualifies Test 21 2 2 2 2 2 Size: 5 Threshold: 50 size qualifies + locality qualifies Test 22 2 2 2 2 2 Size: 5 Threshold: 50 size qualifies + locality qualifies Test 23 2 2 2 2 2 Size: 5 Threshold: 50 size qualifies + locality qualifies Test 24 2 2 2 2 2 Size: 5 Threshold: 50 size qualifies + locality qualifies Test 25 2 2 2 2 2 diff --git a/test/optimizations/arrayViewElision/slices-2d.comm-none.good b/test/optimizations/arrayViewElision/slices-2d.comm-none.good new file mode 100644 index 000000000000..e2ce44ad301d --- /dev/null +++ b/test/optimizations/arrayViewElision/slices-2d.comm-none.good @@ -0,0 +1,371 @@ +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + range(int(64),high,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + range(int(64),high,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + range(int(64),high,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + range(int(64),high,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),high,one) + range(int(64),high,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),high,one) + range(int(64),high,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),low,one) + range(int(64),low,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + range(int(64),neither,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + range(int(64),neither,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + range(int(64),neither,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + range(int(64),neither,one) + +ArrayViewElision supported slices.chpl:21 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + range(int(64),neither,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + range(int(64),neither,one) + +Set first two: + Size: 4 Threshold: 50 + size qualifies +Test 1 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 2 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 3 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 4 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 5 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 6 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 7 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 8 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + +----------------- +Set last two: + Size: 4 Threshold: 50 + size qualifies +Test 9 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 10 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 11 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 12 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 13 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 14 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 15 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 16 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + +----------------- +Set all: + Size: 25 Threshold: 50 + size qualifies +Test 17 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 18 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 19 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 20 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 21 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 22 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 23 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 24 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 25 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + +----------------- diff --git a/test/optimizations/arrayViewElision/slices-2d.good b/test/optimizations/arrayViewElision/slices-2d.good index e2ce44ad301d..48e5c3d2113b 100644 --- a/test/optimizations/arrayViewElision/slices-2d.good +++ b/test/optimizations/arrayViewElision/slices-2d.good @@ -1,4 +1,4 @@ -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -8,7 +8,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),both,one) range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -18,7 +18,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),both,one) range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -28,7 +28,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),both,one) range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -38,7 +38,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),high,one) range(int(64),high,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -48,7 +48,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),high,one) range(int(64),high,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -58,7 +58,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),high,one) range(int(64),high,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -68,7 +68,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),low,one) range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -78,7 +78,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),low,one) range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -88,7 +88,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),low,one) range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -98,7 +98,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),both,one) range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -108,7 +108,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),low,one) range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -118,7 +118,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),neither,one) range(int(64),neither,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -128,7 +128,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),neither,one) range(int(64),neither,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:21 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -141,6 +141,7 @@ ArrayViewElision supported slices.chpl:21 Set first two: Size: 4 Threshold: 50 size qualifies + locality qualifies Test 1 2 2 1 1 1 2 2 1 1 1 @@ -150,6 +151,7 @@ Test 1 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 2 2 2 1 1 1 2 2 1 1 1 @@ -159,6 +161,7 @@ Test 2 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 3 2 2 1 1 1 2 2 1 1 1 @@ -168,6 +171,7 @@ Test 3 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 4 2 2 1 1 1 2 2 1 1 1 @@ -177,6 +181,7 @@ Test 4 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 5 2 2 1 1 1 2 2 1 1 1 @@ -186,6 +191,7 @@ Test 5 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 6 2 2 1 1 1 2 2 1 1 1 @@ -195,6 +201,7 @@ Test 6 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 7 2 2 1 1 1 2 2 1 1 1 @@ -204,6 +211,7 @@ Test 7 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 8 2 2 1 1 1 2 2 1 1 1 @@ -215,6 +223,7 @@ Test 8 Set last two: Size: 4 Threshold: 50 size qualifies + locality qualifies Test 9 1 1 1 1 1 1 1 1 1 1 @@ -224,6 +233,7 @@ Test 9 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 10 1 1 1 1 1 1 1 1 1 1 @@ -233,6 +243,7 @@ Test 10 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 11 1 1 1 1 1 1 1 1 1 1 @@ -242,6 +253,7 @@ Test 11 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 12 1 1 1 1 1 1 1 1 1 1 @@ -251,6 +263,7 @@ Test 12 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 13 1 1 1 1 1 1 1 1 1 1 @@ -260,6 +273,7 @@ Test 13 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 14 1 1 1 1 1 1 1 1 1 1 @@ -269,6 +283,7 @@ Test 14 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 15 1 1 1 1 1 1 1 1 1 1 @@ -278,6 +293,7 @@ Test 15 Size: 4 Threshold: 50 size qualifies + locality qualifies Test 16 1 1 1 1 1 1 1 1 1 1 @@ -289,6 +305,7 @@ Test 16 Set all: Size: 25 Threshold: 50 size qualifies + locality qualifies Test 17 2 2 2 2 2 2 2 2 2 2 @@ -298,6 +315,7 @@ Test 17 Size: 25 Threshold: 50 size qualifies + locality qualifies Test 18 2 2 2 2 2 2 2 2 2 2 @@ -307,6 +325,7 @@ Test 18 Size: 25 Threshold: 50 size qualifies + locality qualifies Test 19 2 2 2 2 2 2 2 2 2 2 @@ -316,6 +335,7 @@ Test 19 Size: 25 Threshold: 50 size qualifies + locality qualifies Test 20 2 2 2 2 2 2 2 2 2 2 @@ -325,6 +345,7 @@ Test 20 Size: 25 Threshold: 50 size qualifies + locality qualifies Test 21 2 2 2 2 2 2 2 2 2 2 @@ -334,6 +355,7 @@ Test 21 Size: 25 Threshold: 50 size qualifies + locality qualifies Test 22 2 2 2 2 2 2 2 2 2 2 @@ -343,6 +365,7 @@ Test 22 Size: 25 Threshold: 50 size qualifies + locality qualifies Test 23 2 2 2 2 2 2 2 2 2 2 @@ -352,6 +375,7 @@ Test 23 Size: 25 Threshold: 50 size qualifies + locality qualifies Test 24 2 2 2 2 2 2 2 2 2 2 @@ -361,6 +385,7 @@ Test 24 Size: 25 Threshold: 50 size qualifies + locality qualifies Test 25 2 2 2 2 2 2 2 2 2 2 From e44ef6ad280b37662e33b7cdf3dcf0a0cbf929b1 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 27 Jun 2024 16:27:22 -0700 Subject: [PATCH 64/88] Add no array view elision pragma Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/arrayViewElision.cpp | 31 +++++++++++++-------- frontend/include/chpl/uast/PragmaList.h | 3 +- modules/internal/ChapelTuple.chpl | 1 + 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 8bc72b0c7fad..9cec50973039 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -64,18 +64,23 @@ void arrayViewElision() { std::vector candidates; for_alive_in_Vec (CallExpr, call, gCallExprs) { - if (call->getModule()->modTag == MOD_USER) { - if (call->isNamed("=")) { - if (exprSuitableForProtoSlice(call->get(1), /* isLhs */ true) && - exprSuitableForProtoSlice(call->get(2), /* isLhs */ false)) { - //std::cout << call->stringLoc() << std::endl; - //nprint_view(call); - candidates.push_back(call); - } + if (FnSymbol* parentFn = toFnSymbol(call->parentSymbol)) { + if (parentFn->hasFlag(FLAG_NO_ARRAY_VIEW_ELISION)) { + continue; + } + } + + if (call->isNamed("=")) { + if (exprSuitableForProtoSlice(call->get(1), /* isLhs */ true) && + exprSuitableForProtoSlice(call->get(2), /* isLhs */ false)) { + //std::cout << call->stringLoc() << std::endl; + //nprint_view(call); + candidates.push_back(call); } } } + for_vector(CallExpr, call, candidates) { SET_LINENO(call); @@ -86,8 +91,8 @@ void arrayViewElision() { CallExpr* rhsPSCall = generateCreateProtoSlice(rhs); // arrayview elision placeholder - VarSymbol* placeholder = new VarSymbol("arrayview_elision_flag", dtBool); - placeholder->addFlag(FLAG_ARRAYVIEW_ELISION_FLAG); + VarSymbol* placeholder = new VarSymbol("array_view_elision_flag", dtBool); + placeholder->addFlag(FLAG_ARRAY_VIEW_ELISION_FLAG); call->insertBefore(new DefExpr(placeholder, gFalse)); @@ -184,6 +189,10 @@ void ProtoSliceAssignHelper::updateAndFoldConditional() { void ProtoSliceAssignHelper::report() { if (!fReportArrayViewElision) return; + if (ModuleSymbol* mod = call_->getModule()) { + // if there's no user module, getModule could return null + if (mod->modTag != MOD_USER) return; + } std::string isSupported = supported() ? "supported" : "not supported"; std::string isDynamic = !fLocal ? " (dynamic locality check required)" : ""; @@ -289,7 +298,7 @@ void ProtoSliceAssignHelper::findCondStmt() { while (cur) { if (CondStmt* condStmt = toCondStmt(cur)) { if (SymExpr* condExpr = toSymExpr(condStmt->condExpr)) { - if (condExpr->symbol()->hasFlag(FLAG_ARRAYVIEW_ELISION_FLAG)) { + if (condExpr->symbol()->hasFlag(FLAG_ARRAY_VIEW_ELISION_FLAG)) { tmpCondFlag_ = condExpr->symbol(); condStmt_ = condStmt; break; diff --git a/frontend/include/chpl/uast/PragmaList.h b/frontend/include/chpl/uast/PragmaList.h index 05491351c890..40bc535af7b6 100644 --- a/frontend/include/chpl/uast/PragmaList.h +++ b/frontend/include/chpl/uast/PragmaList.h @@ -54,7 +54,8 @@ PRAGMA(ALIAS_SCOPE_FROM_THIS, ypr, "alias scope from this", ncm) // Added to the condExpr of a static ArrayView Elision conditional. Should // disappear after resolution -PRAGMA(ARRAYVIEW_ELISION_FLAG, npr, "static flag for arrayview elision", ncm) +PRAGMA(ARRAY_VIEW_ELISION_FLAG, npr, "static flag for array view elision", ncm) +PRAGMA(NO_ARRAY_VIEW_ELISION, ypr, "no array view elision", ncm) // This flag is used in scalarReplace.cpp to determine if an assignment of a ref // has an allocator as the RHS. If so, then it is not creating an alias, since diff --git a/modules/internal/ChapelTuple.chpl b/modules/internal/ChapelTuple.chpl index 9d5ad4e6cb95..f9180ecf8d8f 100644 --- a/modules/internal/ChapelTuple.chpl +++ b/modules/internal/ChapelTuple.chpl @@ -148,6 +148,7 @@ module ChapelTuple { // pragma "compiler generated" pragma "last resort" + pragma "no array view elision" @chpldoc.nodoc inline operator =(ref x: _tuple, pragma "intent ref maybe const formal" y: _tuple) From 7c5c4a8fc468436e34f73cb85335598688c0317a Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 27 Jun 2024 16:37:32 -0700 Subject: [PATCH 65/88] Add a test to lock behavior with function calls Signed-off-by: Engin Kayraklioglu --- .../arrayViewElision/avoidExtraCalls.chpl | 42 +++++++++++++ .../avoidExtraCalls.comm-none.good | 58 ++++++++++++++++++ .../arrayViewElision/avoidExtraCalls.compopts | 1 + .../arrayViewElision/avoidExtraCalls.good | 61 +++++++++++++++++++ 4 files changed, 162 insertions(+) create mode 100644 test/optimizations/arrayViewElision/avoidExtraCalls.chpl create mode 100644 test/optimizations/arrayViewElision/avoidExtraCalls.comm-none.good create mode 100644 test/optimizations/arrayViewElision/avoidExtraCalls.compopts create mode 100644 test/optimizations/arrayViewElision/avoidExtraCalls.good diff --git a/test/optimizations/arrayViewElision/avoidExtraCalls.chpl b/test/optimizations/arrayViewElision/avoidExtraCalls.chpl new file mode 100644 index 000000000000..cfd25405b587 --- /dev/null +++ b/test/optimizations/arrayViewElision/avoidExtraCalls.chpl @@ -0,0 +1,42 @@ +{ + writeln("Test 1"); + var A: [1..10] int; + var B: [1..10] int = 1; + + proc foo() { + writeln("foo called"); + return 1..3; + } + A[foo()] = B[foo()]; + writeln(A); + writeln(); +} + +{ + writeln("Test 2"); + var A: [1..10] int; + const B: [1..10] int = 1; + + proc foo() { + writeln("foo called"); + return 1..3; + } + A[foo()] = B[foo()]; + writeln(A); + writeln(); +} + +{ + writeln("Test 3"); + var A: [1..10, 1..10] int; + var B: [1..10, 1..10] int = 1; + + proc foo() { + writeln("foo called"); + return 1..3; + } + + A[foo(), foo()] = B[foo(), foo()]; + writeln(A); + writeln(); +} diff --git a/test/optimizations/arrayViewElision/avoidExtraCalls.comm-none.good b/test/optimizations/arrayViewElision/avoidExtraCalls.comm-none.good new file mode 100644 index 000000000000..f63d929e32a1 --- /dev/null +++ b/test/optimizations/arrayViewElision/avoidExtraCalls.comm-none.good @@ -0,0 +1,58 @@ +ArrayViewElision supported avoidExtraCalls.chpl:10 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported avoidExtraCalls.chpl:24 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported avoidExtraCalls.chpl:39 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + +Test 1 +foo called +foo called + Size: 3 Threshold: 50 + size qualifies +1 1 1 0 0 0 0 0 0 0 + +Test 2 +foo called +foo called + Size: 3 Threshold: 50 + size qualifies +1 1 1 0 0 0 0 0 0 0 + +Test 3 +foo called +foo called +foo called +foo called + Size: 9 Threshold: 50 + size qualifies +1 1 1 0 0 0 0 0 0 0 +1 1 1 0 0 0 0 0 0 0 +1 1 1 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 + diff --git a/test/optimizations/arrayViewElision/avoidExtraCalls.compopts b/test/optimizations/arrayViewElision/avoidExtraCalls.compopts new file mode 100644 index 000000000000..1facc70badbb --- /dev/null +++ b/test/optimizations/arrayViewElision/avoidExtraCalls.compopts @@ -0,0 +1 @@ +--report-array-view-elision -sdebugShortArrayTransfer diff --git a/test/optimizations/arrayViewElision/avoidExtraCalls.good b/test/optimizations/arrayViewElision/avoidExtraCalls.good new file mode 100644 index 000000000000..bf94bf5f6d6c --- /dev/null +++ b/test/optimizations/arrayViewElision/avoidExtraCalls.good @@ -0,0 +1,61 @@ +ArrayViewElision supported (dynamic locality check required) avoidExtraCalls.chpl:10 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported (dynamic locality check required) avoidExtraCalls.chpl:24 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + +ArrayViewElision supported (dynamic locality check required) avoidExtraCalls.chpl:39 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + range(int(64),both,one) + +Test 1 +foo called +foo called + Size: 3 Threshold: 50 + size qualifies + locality qualifies +1 1 1 0 0 0 0 0 0 0 + +Test 2 +foo called +foo called + Size: 3 Threshold: 50 + size qualifies + locality qualifies +1 1 1 0 0 0 0 0 0 0 + +Test 3 +foo called +foo called +foo called +foo called + Size: 9 Threshold: 50 + size qualifies + locality qualifies +1 1 1 0 0 0 0 0 0 0 +1 1 1 0 0 0 0 0 0 0 +1 1 1 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 + From 536607499e586a7d75af559b3f656501b7a1babd Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 27 Jun 2024 17:06:25 -0700 Subject: [PATCH 66/88] Start supporting domains as slicing expressions, too Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 73 +++++++++++++---- .../slices-1d-domain.comm-none.good | 52 ++++++++++++ .../arrayViewElision/slices-1d-domain.good | 58 +++++++++++++ ...ne.good => slices-1d-range.comm-none.good} | 28 +++---- .../{slices-1d.good => slices-1d-range.good} | 28 +++---- .../slices-2d-domain.comm-none.good | 76 +++++++++++++++++ .../arrayViewElision/slices-2d-domain.good | 82 +++++++++++++++++++ ...ne.good => slices-2d-range.comm-none.good} | 28 +++---- .../{slices-2d.good => slices-2d-range.good} | 28 +++---- .../arrayViewElision/slices.chpl | 52 +++++++----- .../arrayViewElision/slices.compopts | 6 +- 11 files changed, 417 insertions(+), 94 deletions(-) create mode 100644 test/optimizations/arrayViewElision/slices-1d-domain.comm-none.good create mode 100644 test/optimizations/arrayViewElision/slices-1d-domain.good rename test/optimizations/arrayViewElision/{slices-1d.comm-none.good => slices-1d-range.comm-none.good} (89%) rename test/optimizations/arrayViewElision/{slices-1d.good => slices-1d-range.good} (97%) create mode 100644 test/optimizations/arrayViewElision/slices-2d-domain.comm-none.good create mode 100644 test/optimizations/arrayViewElision/slices-2d-domain.good rename test/optimizations/arrayViewElision/{slices-2d.comm-none.good => slices-2d-range.comm-none.good} (92%) rename test/optimizations/arrayViewElision/{slices-2d.good => slices-2d-range.good} (98%) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 0770244519b3..50529f6f6b3b 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -56,7 +56,10 @@ module ChapelArrayViewElision { this.rank = ptrToArr.deref().rank; this.isConst = isConst; this.ptrToArr = ptrToArr; - if allBounded(slicingExprs) { + if isDomain(slicingExprs) { + this.ranges = slicingExprs; + } + else if allBounded(slicingExprs) { this.ranges = slicingExprs; } else if chpl__isTupleOfRanges(slicingExprs) { @@ -79,21 +82,35 @@ module ChapelArrayViewElision { } inline proc domOrRange where rank==1 { - return ranges; + return ranges; // doesn't matter whether it is a domain or a range } inline proc domOrRange where rank>1 { - return {(...ranges)}; - } - - inline proc dims() where chpl__isTupleOfRanges(this.ranges) { - return ranges; + if isDomain(ranges) then + return ranges; + else + return {(...ranges)}; } inline proc dims() { - return (ranges,); + if chpl__isTupleOfRanges(this.ranges) { + return ranges; + } + else if isDomain(this.ranges) { + return ranges.dims(); + } + else if isRange(this.ranges) { + return (ranges,); + } + else { + compilerError("Unhandled case in chpl__protoSlice.dims()"); + } } + /*inline proc dims() {*/ + /*return (ranges,);*/ + /*}*/ + inline proc rank param { return ptrToArr.deref().rank; } inline proc eltType type { return ptrToArr.deref().eltType; } inline proc _value { return ptrToArr.deref()._value; } @@ -103,11 +120,16 @@ module ChapelArrayViewElision { } inline proc sizeAs(type t) { - var size = 1:t; - for param r in 0.. Size: 2 Threshold: 50 + size qualifies +Test 1 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies +Test 2 +2 2 1 1 1 + +----------------- +Set last two: + Size: 2 Threshold: 50 + size qualifies +Test 3 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies +Test 4 +1 1 1 2 2 + +----------------- +Set all: + Size: 5 Threshold: 50 + size qualifies +Test 5 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies +Test 6 +2 2 2 2 2 + +----------------- diff --git a/test/optimizations/arrayViewElision/slices-1d-domain.good b/test/optimizations/arrayViewElision/slices-1d-domain.good new file mode 100644 index 000000000000..e8f91f6bf314 --- /dev/null +++ b/test/optimizations/arrayViewElision/slices-1d-domain.good @@ -0,0 +1,58 @@ +ArrayViewElision supported (dynamic locality check required) slices.chpl:23 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + domain(1,int(64),one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + domain(1,int(64),one) + +ArrayViewElision supported (dynamic locality check required) slices.chpl:23 + lhsBaseType: [domain(1,int(64),one)] int(64) + lhsIndexingExprs: + domain(1,int(64),one) + rhsBaseType: [domain(1,int(64),one)] int(64) + rhsIndexingExprs: + domain(1,int(64),one) + +Set first two: + Size: 2 Threshold: 50 + size qualifies + locality qualifies +Test 1 +2 2 1 1 1 + + Size: 2 Threshold: 50 + size qualifies + locality qualifies +Test 2 +2 2 1 1 1 + +----------------- +Set last two: + Size: 2 Threshold: 50 + size qualifies + locality qualifies +Test 3 +1 1 1 2 2 + + Size: 2 Threshold: 50 + size qualifies + locality qualifies +Test 4 +1 1 1 2 2 + +----------------- +Set all: + Size: 5 Threshold: 50 + size qualifies + locality qualifies +Test 5 +2 2 2 2 2 + + Size: 5 Threshold: 50 + size qualifies + locality qualifies +Test 6 +2 2 2 2 2 + +----------------- diff --git a/test/optimizations/arrayViewElision/slices-1d.comm-none.good b/test/optimizations/arrayViewElision/slices-1d-range.comm-none.good similarity index 89% rename from test/optimizations/arrayViewElision/slices-1d.comm-none.good rename to test/optimizations/arrayViewElision/slices-1d-range.comm-none.good index 9215b5c29cc3..2440e2a251a9 100644 --- a/test/optimizations/arrayViewElision/slices-1d.comm-none.good +++ b/test/optimizations/arrayViewElision/slices-1d-range.comm-none.good @@ -1,4 +1,4 @@ -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -6,7 +6,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -14,7 +14,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -22,7 +22,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -30,7 +30,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),high,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -38,7 +38,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),high,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -46,7 +46,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),high,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -54,7 +54,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -62,7 +62,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -70,7 +70,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -78,7 +78,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -86,7 +86,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -94,7 +94,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),neither,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -102,7 +102,7 @@ ArrayViewElision supported slices.chpl:21 rhsIndexingExprs: range(int(64),neither,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) diff --git a/test/optimizations/arrayViewElision/slices-1d.good b/test/optimizations/arrayViewElision/slices-1d-range.good similarity index 97% rename from test/optimizations/arrayViewElision/slices-1d.good rename to test/optimizations/arrayViewElision/slices-1d-range.good index 53b2be34c324..f5cb50f914a2 100644 --- a/test/optimizations/arrayViewElision/slices-1d.good +++ b/test/optimizations/arrayViewElision/slices-1d-range.good @@ -1,4 +1,4 @@ -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -6,7 +6,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),both,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -14,7 +14,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),both,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -22,7 +22,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),both,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -30,7 +30,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),high,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -38,7 +38,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),high,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -46,7 +46,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),high,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -54,7 +54,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),low,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -62,7 +62,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),low,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -70,7 +70,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),low,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -78,7 +78,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),both,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -86,7 +86,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),low,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -94,7 +94,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),neither,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -102,7 +102,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 rhsIndexingExprs: range(int(64),neither,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) diff --git a/test/optimizations/arrayViewElision/slices-2d-domain.comm-none.good b/test/optimizations/arrayViewElision/slices-2d-domain.comm-none.good new file mode 100644 index 000000000000..d5b8de10f2b6 --- /dev/null +++ b/test/optimizations/arrayViewElision/slices-2d-domain.comm-none.good @@ -0,0 +1,76 @@ +ArrayViewElision supported slices.chpl:23 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + domain(2,int(64),one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + domain(2,int(64),one) + +ArrayViewElision supported slices.chpl:23 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + domain(2,int(64),one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + domain(2,int(64),one) + +Set first two: + Size: 4 Threshold: 50 + size qualifies +Test 1 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies +Test 2 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + +----------------- +Set last two: + Size: 4 Threshold: 50 + size qualifies +Test 3 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies +Test 4 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + +----------------- +Set all: + Size: 25 Threshold: 50 + size qualifies +Test 5 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies +Test 6 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + +----------------- diff --git a/test/optimizations/arrayViewElision/slices-2d-domain.good b/test/optimizations/arrayViewElision/slices-2d-domain.good new file mode 100644 index 000000000000..f5abd827ad94 --- /dev/null +++ b/test/optimizations/arrayViewElision/slices-2d-domain.good @@ -0,0 +1,82 @@ +ArrayViewElision supported (dynamic locality check required) slices.chpl:23 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + domain(2,int(64),one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + domain(2,int(64),one) + +ArrayViewElision supported (dynamic locality check required) slices.chpl:23 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + domain(2,int(64),one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + domain(2,int(64),one) + +Set first two: + Size: 4 Threshold: 50 + size qualifies + locality qualifies +Test 1 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 4 Threshold: 50 + size qualifies + locality qualifies +Test 2 +2 2 1 1 1 +2 2 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 + +----------------- +Set last two: + Size: 4 Threshold: 50 + size qualifies + locality qualifies +Test 3 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + + Size: 4 Threshold: 50 + size qualifies + locality qualifies +Test 4 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 2 2 +1 1 1 2 2 + +----------------- +Set all: + Size: 25 Threshold: 50 + size qualifies + locality qualifies +Test 5 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + + Size: 25 Threshold: 50 + size qualifies + locality qualifies +Test 6 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 +2 2 2 2 2 + +----------------- diff --git a/test/optimizations/arrayViewElision/slices-2d.comm-none.good b/test/optimizations/arrayViewElision/slices-2d-range.comm-none.good similarity index 92% rename from test/optimizations/arrayViewElision/slices-2d.comm-none.good rename to test/optimizations/arrayViewElision/slices-2d-range.comm-none.good index e2ce44ad301d..d054bd11e263 100644 --- a/test/optimizations/arrayViewElision/slices-2d.comm-none.good +++ b/test/optimizations/arrayViewElision/slices-2d-range.comm-none.good @@ -1,4 +1,4 @@ -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -8,7 +8,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),both,one) range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -18,7 +18,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),both,one) range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -28,7 +28,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),both,one) range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -38,7 +38,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),high,one) range(int(64),high,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -48,7 +48,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),high,one) range(int(64),high,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -58,7 +58,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),high,one) range(int(64),high,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -68,7 +68,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),low,one) range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -78,7 +78,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),low,one) range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -88,7 +88,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),low,one) range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -98,7 +98,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),both,one) range(int(64),both,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -108,7 +108,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),low,one) range(int(64),low,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -118,7 +118,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),neither,one) range(int(64),neither,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -128,7 +128,7 @@ ArrayViewElision supported slices.chpl:21 range(int(64),neither,one) range(int(64),neither,one) -ArrayViewElision supported slices.chpl:21 +ArrayViewElision supported slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) diff --git a/test/optimizations/arrayViewElision/slices-2d.good b/test/optimizations/arrayViewElision/slices-2d-range.good similarity index 98% rename from test/optimizations/arrayViewElision/slices-2d.good rename to test/optimizations/arrayViewElision/slices-2d-range.good index 48e5c3d2113b..d8fe2715ad84 100644 --- a/test/optimizations/arrayViewElision/slices-2d.good +++ b/test/optimizations/arrayViewElision/slices-2d-range.good @@ -1,4 +1,4 @@ -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -8,7 +8,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),both,one) range(int(64),both,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -18,7 +18,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),both,one) range(int(64),both,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -28,7 +28,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),both,one) range(int(64),both,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -38,7 +38,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),high,one) range(int(64),high,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -48,7 +48,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),high,one) range(int(64),high,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),high,one) @@ -58,7 +58,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),high,one) range(int(64),high,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -68,7 +68,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),low,one) range(int(64),low,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -78,7 +78,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),low,one) range(int(64),low,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -88,7 +88,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),low,one) range(int(64),low,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -98,7 +98,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),both,one) range(int(64),both,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),low,one) @@ -108,7 +108,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),low,one) range(int(64),low,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -118,7 +118,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),neither,one) range(int(64),neither,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -128,7 +128,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:21 range(int(64),neither,one) range(int(64),neither,one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:21 +ArrayViewElision supported (dynamic locality check required) slices.chpl:26 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) diff --git a/test/optimizations/arrayViewElision/slices.chpl b/test/optimizations/arrayViewElision/slices.chpl index 17911a8de87c..90b20ba4ab79 100644 --- a/test/optimizations/arrayViewElision/slices.chpl +++ b/test/optimizations/arrayViewElision/slices.chpl @@ -1,3 +1,4 @@ +config param useDomain = true; config param rank = 1; proc multuplify(param rank, x) { @@ -18,7 +19,12 @@ proc test(dstRange, srcRange) { const dstSlice = multuplify(rank, dstRange); const srcSlice = multuplify(rank, dstRange); - A[(...dstSlice)] = B[(...srcSlice)]; + if useDomain { + A[{(...dstSlice)}] = B[{(...srcSlice)}]; + } + else { + A[(...dstSlice)] = B[(...srcSlice)]; + } writeln("Test ", counter); writeln(A); @@ -31,33 +37,39 @@ proc test(dstRange, srcRange) { writeln("Set first two:"); test(1..2, 1..2); test(1..2, 4..5); -test(1..2, ..2); -test(1..2, 4..); -test(..2, 1..2); -test(..2, 3..4); -test(..2, ..2); -test(..2, 4..); +if !useDomain { + test(1..2, ..2); + test(1..2, 4..); + test(..2, 1..2); + test(..2, 3..4); + test(..2, ..2); + test(..2, 4..); +} writeln("-----------------"); writeln("Set last two:"); test(4..5, 1..2); test(4..5, 4..5); -test(4..5, ..2); -test(4..5, 4..); -test(4.., 1..2); -test(4.., 4..5); -test(4.., ..2); -test(4.., 4..); +if !useDomain { + test(4..5, ..2); + test(4..5, 4..); + test(4.., 1..2); + test(4.., 4..5); + test(4.., ..2); + test(4.., 4..); +} writeln("-----------------"); writeln("Set all:"); test(1..5, 1..5); test(1..5, 1..); -test(1..5, ..); -test(1.., 1..5); -test(1.., 1..); -test(1.., ..); -test(.., 1..5); -test(.., 1..); -test(.., ..); +if !useDomain { + test(1..5, ..); + test(1.., 1..5); + test(1.., 1..); + test(1.., ..); + test(.., 1..5); + test(.., 1..); + test(.., ..); +} writeln("-----------------"); diff --git a/test/optimizations/arrayViewElision/slices.compopts b/test/optimizations/arrayViewElision/slices.compopts index 01ab8bce32cb..caa57727340c 100644 --- a/test/optimizations/arrayViewElision/slices.compopts +++ b/test/optimizations/arrayViewElision/slices.compopts @@ -1,2 +1,4 @@ ---report-array-view-elision -sdebugShortArrayTransfer=true -srank=1 # slices-1d ---report-array-view-elision -sdebugShortArrayTransfer=true -srank=2 # slices-2d +--report-array-view-elision -sdebugShortArrayTransfer=true -srank=1 -suseDomain=false # slices-1d-range +--report-array-view-elision -sdebugShortArrayTransfer=true -srank=2 -suseDomain=false # slices-2d-range +--report-array-view-elision -sdebugShortArrayTransfer=true -srank=1 -suseDomain=true # slices-1d-domain +--report-array-view-elision -sdebugShortArrayTransfer=true -srank=2 -suseDomain=true # slices-2d-domain From c7337148412aa10ce7ca65f68bfc835f73e4ade7 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Thu, 27 Jun 2024 21:16:16 -0700 Subject: [PATCH 67/88] Fix a reporting output in the compiler Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/arrayViewElision.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 9cec50973039..9d087ff9e3cb 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -195,7 +195,9 @@ void ProtoSliceAssignHelper::report() { } std::string isSupported = supported() ? "supported" : "not supported"; - std::string isDynamic = !fLocal ? " (dynamic locality check required)" : ""; + std::string isDynamic = (supported() && !fLocal) ? + " (dynamic locality check required)" : + ""; std::cout << "ArrayViewElision " << isSupported << isDynamic << " " << call_->stringLoc() << std::endl; From af2a8b3446d300a5f689f777335796fa398bacf1 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 28 Jun 2024 14:57:37 -0700 Subject: [PATCH 68/88] Get basic rank-change views to work Signed-off-by: Engin Kayraklioglu --- compiler/include/arrayViewElision.h | 20 +++ compiler/optimizations/arrayViewElision.cpp | 130 +++++++++++------- modules/internal/ChapelArrayViewElision.chpl | 108 +++++++++++++-- .../arrayViewElision/rankChanges.chpl | 6 + .../arrayViewElision/rankChanges.good | 10 ++ 5 files changed, 214 insertions(+), 60 deletions(-) create mode 100644 test/optimizations/arrayViewElision/rankChanges.chpl create mode 100644 test/optimizations/arrayViewElision/rankChanges.good diff --git a/compiler/include/arrayViewElision.h b/compiler/include/arrayViewElision.h index 2606b0d34050..1b6005a47e7c 100644 --- a/compiler/include/arrayViewElision.h +++ b/compiler/include/arrayViewElision.h @@ -25,6 +25,26 @@ // interface for normalize void arrayViewElision(); +class ArrayViewElisionTransformer { +public: + ArrayViewElisionTransformer() = delete; + ArrayViewElisionTransformer(CallExpr* origCall); + + inline bool candidate() const { return candidate_; } + + void transform(); + +private: + CallExpr* origCall_; + CallExpr* origLhs_; + CallExpr* origRhs_; + + bool candidate_; + + bool exprSuitableForProtoSlice(CallExpr* e, bool isLhs); + CallExpr* genCreateProtoSlice(CallExpr* call); +}; + // interface for prefold class ProtoSliceAssignHelper { public: diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 9d087ff9e3cb..9bf72b4a6e53 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -26,20 +26,82 @@ #include "resolution.h" #include "view.h" -static bool exprSuitableForProtoSlice(Expr* e, bool isLhs) { - if (CallExpr* call = toCallExpr(e)) { - if (SymExpr* callBase = toSymExpr(call->baseExpr)) { - if (!isFnSymbol(callBase->symbol()) && - (!isLhs || !callBase->symbol()->isConstant())) { - return true; - } +ArrayViewElisionTransformer::ArrayViewElisionTransformer(CallExpr* origCall): + origCall_(origCall) { + + origLhs_ = toCallExpr(origCall_->get(1)); + origRhs_ = toCallExpr(origCall_->get(2)); + + // one side is not a call + // TODO this should be relaxed for `array=view` or vice versa + if (origLhs_ == nullptr || origRhs_ == nullptr) { + candidate_ = false; + return; + } + + // calls have different number of actuals + // TODO this should be relaxed for `slice=rank-change` or vice versa + if (origLhs_->numActuals() != origRhs_->numActuals()) { + candidate_ = false; + return; + } + + // further analysis per call + if ( !(exprSuitableForProtoSlice(origLhs_, /*isLhs*/ true) || + exprSuitableForProtoSlice(origRhs_, /*isLhs*/ false)) ) { + candidate_ = false; + return; + } + + candidate_ = true; +} + +void ArrayViewElisionTransformer::transform() { + INT_ASSERT(candidate_); + + SET_LINENO(origCall_); + + CallExpr* lhsPSCall = genCreateProtoSlice(origLhs_); + CallExpr* rhsPSCall = genCreateProtoSlice(origRhs_); + + // arrayview elision placeholder + VarSymbol* placeholder = new VarSymbol("array_view_elision_flag", dtBool); + placeholder->addFlag(FLAG_ARRAY_VIEW_ELISION_FLAG); + + origCall_->insertBefore(new DefExpr(placeholder, gFalse)); + + BlockStmt* thenBlock = new BlockStmt(); + VarSymbol* lhsPS = new VarSymbol("lhs_proto_slice"); + VarSymbol* rhsPS = new VarSymbol("rhs_proto_slice"); + + thenBlock->insertAtTail(new DefExpr(lhsPS, lhsPSCall)); + thenBlock->insertAtTail(new DefExpr(rhsPS, rhsPSCall)); + thenBlock->insertAtTail(new CallExpr(PRIM_PROTO_SLICE_ASSIGN, lhsPS, + rhsPS)); + + BlockStmt* elseBlock = new BlockStmt(); + + CondStmt* cond = new CondStmt(new SymExpr(placeholder), thenBlock, + elseBlock); + + origCall_->insertBefore(cond); + elseBlock->insertAtTail(origCall_->remove()); +} + + + +bool ArrayViewElisionTransformer::exprSuitableForProtoSlice(CallExpr* call, + bool isLhs) { + if (SymExpr* callBase = toSymExpr(call->baseExpr)) { + if (!isFnSymbol(callBase->symbol()) && + (!isLhs || !callBase->symbol()->isConstant())) { + return true; } } return false; } - -static CallExpr* generateCreateProtoSlice(CallExpr* call) { +CallExpr* ArrayViewElisionTransformer::genCreateProtoSlice(CallExpr* call) { INT_ASSERT(call); SymExpr* base = toSymExpr(call->baseExpr); @@ -58,11 +120,10 @@ static CallExpr* generateCreateProtoSlice(CallExpr* call) { return ret; } + void arrayViewElision() { if (!fArrayViewElision) return; - std::vector candidates; - for_alive_in_Vec (CallExpr, call, gCallExprs) { if (FnSymbol* parentFn = toFnSymbol(call->parentSymbol)) { if (parentFn->hasFlag(FLAG_NO_ARRAY_VIEW_ELISION)) { @@ -70,50 +131,15 @@ void arrayViewElision() { } } + if (call->getModule()->modTag == MOD_USER) { if (call->isNamed("=")) { - if (exprSuitableForProtoSlice(call->get(1), /* isLhs */ true) && - exprSuitableForProtoSlice(call->get(2), /* isLhs */ false)) { - //std::cout << call->stringLoc() << std::endl; - //nprint_view(call); - candidates.push_back(call); + ArrayViewElisionTransformer transformer(call); + + if (transformer.candidate()) { + transformer.transform(); } } - } - - - for_vector(CallExpr, call, candidates) { - SET_LINENO(call); - - CallExpr* lhs = toCallExpr(call->get(1)); - CallExpr* rhs = toCallExpr(call->get(2)); - - CallExpr* lhsPSCall = generateCreateProtoSlice(lhs); - CallExpr* rhsPSCall = generateCreateProtoSlice(rhs); - - // arrayview elision placeholder - VarSymbol* placeholder = new VarSymbol("array_view_elision_flag", dtBool); - placeholder->addFlag(FLAG_ARRAY_VIEW_ELISION_FLAG); - - call->insertBefore(new DefExpr(placeholder, gFalse)); - - BlockStmt* thenBlock = new BlockStmt(); - VarSymbol* lhsPS = new VarSymbol("lhs_proto_slice"); - VarSymbol* rhsPS = new VarSymbol("rhs_proto_slice"); - - thenBlock->insertAtTail(new DefExpr(lhsPS, lhsPSCall)); - thenBlock->insertAtTail(new DefExpr(rhsPS, rhsPSCall)); - thenBlock->insertAtTail(new CallExpr(PRIM_PROTO_SLICE_ASSIGN, lhsPS, - rhsPS)); - - BlockStmt* elseBlock = new BlockStmt(); - - CondStmt* cond = new CondStmt(new SymExpr(placeholder), thenBlock, - elseBlock); - - call->insertBefore(cond); - elseBlock->insertAtTail(call->remove()); - - //list_view(cond); + } } } diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 50529f6f6b3b..1f09fa90a493 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -23,6 +23,7 @@ module ChapelArrayViewElision { use ChapelRange; use DefaultRectangular; use CTypes; + use ChapelArray only _validRankChangeArgs; // TODO the following can be removed? proc isProtoSlice(a) param { return isSubtype(a.type, chpl__protoSlice); } @@ -35,11 +36,70 @@ module ChapelArrayViewElision { return isArrayOrProtoSlice(a) && isArrayOrProtoSlice(b); } + proc rangify(rcTup) { + compilerAssert(isTuple(rcTup)); + + proc createRangifiedType(type rcTupType) type { + proc recurse(type curType, param dim) type { + if dim == rcTupType.size { + return curType; + } + else if isRangeType(rcTupType[dim]) { + if dim == 0 { + return recurse((rcTupType[dim],), dim+1); + } + else { + return recurse(((...curType), rcTupType[dim]), dim+1); + } + } + else { + const dummy: rcTupType[dim]; + const dummyRange = dummy..dummy; + if dim == 0 { + return recurse((dummyRange.type,), dim+1); + } + else { + return recurse(((...curType), dummyRange.type)); + } + } + } + + return recurse(nothing, 0); + } + + var ret: createRangifiedType(rcTup.type); + + for param i in 0.. Date: Fri, 28 Jun 2024 16:04:36 -0700 Subject: [PATCH 69/88] Stop trying to handle unmatching rank changes Signed-off-by: Engin Kayraklioglu --- compiler/include/arrayViewElision.h | 3 +- compiler/optimizations/arrayViewElision.cpp | 33 ++++++- modules/internal/ChapelArrayViewElision.chpl | 85 ++++++++----------- .../arrayViewElision/rankChanges.chpl | 18 +++- .../arrayViewElision/rankChanges.compopts | 1 + .../arrayViewElision/rankChanges.good | 78 ++++++++++++++--- 6 files changed, 152 insertions(+), 66 deletions(-) create mode 100644 test/optimizations/arrayViewElision/rankChanges.compopts diff --git a/compiler/include/arrayViewElision.h b/compiler/include/arrayViewElision.h index 1b6005a47e7c..e71aa8e81994 100644 --- a/compiler/include/arrayViewElision.h +++ b/compiler/include/arrayViewElision.h @@ -80,7 +80,8 @@ class ProtoSliceAssignHelper { void findCondStmt(); void findProtoSlices(); - bool handleOneProtoSlice(CallExpr* call, bool isLhs); + bool handleOneProtoSlice(bool isLhs); + bool protoSliceTypesMatch() const; CallExpr* findOneProtoSliceCall(Expr* e); Symbol* getFlagReplacement(); }; diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 9bf72b4a6e53..0be499b5f73c 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -164,8 +164,9 @@ ProtoSliceAssignHelper::ProtoSliceAssignHelper(CallExpr* call): BlockStmt* parentBlock = toBlockStmt(call->parentExpr); parentBlock->insertAtHead(staticCheckBlock_); - supported_ = handleOneProtoSlice(newProtoSliceLhs_, /* isLhs */ true) && - handleOneProtoSlice(newProtoSliceRhs_, /* isLhs */ false); + supported_ = handleOneProtoSlice(/* isLhs */ true) && + handleOneProtoSlice(/* isLhs */ false) && + protoSliceTypesMatch(); findCondStmt(); INT_ASSERT(condStmt_); @@ -242,7 +243,9 @@ void ProtoSliceAssignHelper::report() { std::cout << std::endl; } -bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call, bool isLhs) { +bool ProtoSliceAssignHelper::handleOneProtoSlice(bool isLhs) { + CallExpr* call = isLhs ? newProtoSliceLhs_ : newProtoSliceRhs_; + INT_ASSERT(call->isNamed("chpl__createProtoSlice") || call->isNamed("chpl__createConstProtoSlice")); @@ -291,6 +294,30 @@ bool ProtoSliceAssignHelper::handleOneProtoSlice(CallExpr* call, bool isLhs) { return ret; } +// make sure we don't try to optimize A[3, 1..3] = B[1..3, 3]. +// because the current optimization works by strength-reducing that to use +// slices : A[3..3, 1..3] = B[1..3, 3..3]. That's not a valid assignment as +// opposed to what the user wrote. +bool ProtoSliceAssignHelper::protoSliceTypesMatch() const { + CallExpr* typeMatchCheck = new CallExpr("chpl__ave_typesMatch", + call_->get(1)->copy(), + call_->get(2)->copy()); + + VarSymbol* tmp = newTemp("call_tmp", dtBool); + DefExpr* flagDef = new DefExpr(tmp, typeMatchCheck); + + staticCheckBlock_->insertAtTail(flagDef); + + resolveExpr(typeMatchCheck); + resolveExpr(flagDef); + + bool ret = (toSymExpr(flagDef->init)->symbol() == gTrue); + + flagDef->remove(); + + return ret; +} + // e must be the lhs or rhs of PRIM_ASSIGN_PROTO_SLICES // returns the `chpl__createProtoSlice call CallExpr* ProtoSliceAssignHelper::findOneProtoSliceCall(Expr* e) { diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 1f09fa90a493..c249efbf4f00 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -59,7 +59,7 @@ module ChapelArrayViewElision { return recurse((dummyRange.type,), dim+1); } else { - return recurse(((...curType), dummyRange.type)); + return recurse(((...curType), dummyRange.type), dim+1); } } } @@ -81,25 +81,13 @@ module ChapelArrayViewElision { return ret; } - proc numCollapsedDims(rcTup) param { - proc recurse(param curDim, param curVal) param { - if curDim == rcTup.size then - return curVal; - else if isRange(rcTup[curDim]) then - return recurse(curDim+1, curVal); - else - return recurse(curDim+1, curVal+1); - } - - return recurse(0, 0); - } record chpl__protoSlice { param rank; param isConst; var ptrToArr; // I want this to be a `forwarding ref` to the array var ranges; - param nCollapsed; + type slicingExprType; proc init() { // this constructor is called to create dummy protoSlices that will never @@ -110,50 +98,37 @@ module ChapelArrayViewElision { var dummyArr = [1,]; this.ptrToArr = c_addrOf(dummyArr); this.ranges = 1..0; - this.nCollapsed = 0; + this.slicingExprType = this.ranges.type; } - extern proc printf(s...); - proc init(param isConst, ptrToArr, slicingExprs) { this.rank = ptrToArr.deref().rank; this.isConst = isConst; this.ptrToArr = ptrToArr; - if isRange(slicingExprs) { - /*compilerWarning("100\n");*/ + if isDomain(slicingExprs) { this.ranges = slicingExprs; - this.nCollapsed = 0; + } + else if isRange(slicingExprs) { + if allBounded(slicingExprs) { + this.ranges = slicingExprs; + } + else { + this.ranges = tupleOfRangesSlice(ptrToArr.deref().dims(), + (slicingExprs,))[0]; + // [0] at the end makes it a range instead of tuple of ranges + } } else if chpl__isTupleOfRanges(slicingExprs) { - /*compilerWarning("200\n");*/ this.ranges = tupleOfRangesSlice(ptrToArr.deref().dims(), slicingExprs); - this.nCollapsed = 0; } else if _validRankChangeArgs(slicingExprs, ptrToArr.deref().idxType) { - /*compilerWarning(numCollapsedDims(slicingExprs));*/ - /*compilerWarning("300\n");*/ this.ranges = rangify(slicingExprs); - this.nCollapsed = numCollapsedDims(slicingExprs); - } - else if isDomain(slicingExprs) { - /*compilerWarning("400\n");*/ - this.ranges = slicingExprs; - this.nCollapsed = 0; - } - else if allBounded(slicingExprs) { - /*compilerWarning("500\n");*/ - // TODO do we need this branch? - this.ranges = slicingExprs; - this.nCollapsed = 0; } else { - /*compilerWarning("600\n");*/ - // TODO do we need this branch? - this.ranges = tupleOfRangesSlice(ptrToArr.deref().dims(), - (slicingExprs,))[0]; - this.nCollapsed = 0; - // [0] at the end makes it a range instead of tuple of ranges + this.ranges = 1..0; // needed to keep the compiler happy + compilerError("Unexpected slicing expr in chpl__protoSlice.init"); } + this.slicingExprType = slicingExprs.type; } proc init=(other: chpl__protoSlice) { @@ -161,7 +136,7 @@ module ChapelArrayViewElision { this.isConst = other.isConst; this.ptrToArr = other.ptrToArr; this.ranges = other.ranges; - this.nCollapsed = other.nCollapsed; + this.slicingExprType = other.slicingExprType; init this; halt("protoSlice copy initializer should never be called"); } @@ -192,10 +167,6 @@ module ChapelArrayViewElision { } } - /*inline proc dims() {*/ - /*return (ranges,);*/ - /*}*/ - inline proc rank param { return ptrToArr.deref().rank; } inline proc eltType type { return ptrToArr.deref().eltType; } inline proc _value { return ptrToArr.deref()._value; } @@ -289,7 +260,6 @@ module ChapelArrayViewElision { operator ==(const ref lhs: chpl__protoSlice(?), const ref rhs: chpl__protoSlice(?)) { return lhs.rank == rhs.rank && - lhs.nCollapsed == rhs.nCollapsed && lhs.ptrToArr == rhs.ptrToArr && lhs.ranges == rhs.ranges; } @@ -396,6 +366,25 @@ module ChapelArrayViewElision { chpl__indexingExprsSupportAVE(base.idxType, (...indexingExprs)); } + proc chpl__ave_typesMatch(a: chpl__protoSlice, b: chpl__protoSlice) param: bool { + // we want to check that if there are integrals in the original slicing + // expressions, they are at the same rank. In other words, if we are working + // with rank-changes, we want to make sure that the collapsed dims on both + // sides match + + type aType = a.slicingExprType; + type bType = b.slicingExprType; + compilerAssert(a.slicingExprType.size == b.slicingExprType.size); + for param i in 0.. Size: 3 Threshold: 50 + size qualifies +1 1 1 1 1 +1 1 1 1 1 +2 2 2 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 3 Threshold: 50 + size qualifies +1 1 2 1 1 +1 1 2 1 1 +1 1 2 1 1 +1 1 1 1 1 +1 1 1 1 1 + +1 1 1 1 1 +1 1 1 1 1 +2 2 2 1 1 +1 1 1 1 1 +1 1 1 1 1 + +1 1 2 1 1 +1 1 2 1 1 +1 1 2 1 1 +1 1 1 1 1 +1 1 1 1 1 + From 0991c81797d26919d61bdbf02ba4475830fb49d9 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 28 Jun 2024 16:25:53 -0700 Subject: [PATCH 70/88] Further tighten checks Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/arrayViewElision.cpp | 4 +++- modules/internal/ChapelArrayViewElision.chpl | 20 ++++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 0be499b5f73c..7d5da474bb15 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -47,7 +47,7 @@ ArrayViewElisionTransformer::ArrayViewElisionTransformer(CallExpr* origCall): } // further analysis per call - if ( !(exprSuitableForProtoSlice(origLhs_, /*isLhs*/ true) || + if ( !(exprSuitableForProtoSlice(origLhs_, /*isLhs*/ true) && exprSuitableForProtoSlice(origRhs_, /*isLhs*/ false)) ) { candidate_ = false; return; @@ -95,6 +95,8 @@ bool ArrayViewElisionTransformer::exprSuitableForProtoSlice(CallExpr* call, if (SymExpr* callBase = toSymExpr(call->baseExpr)) { if (!isFnSymbol(callBase->symbol()) && (!isLhs || !callBase->symbol()->isConstant())) { + // we avoid touching const lhs, otherwise this optimization causes the + // const checking error to point at the internal code rather than users. return true; } } diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index c249efbf4f00..4d2108e5b68c 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -86,8 +86,9 @@ module ChapelArrayViewElision { param rank; param isConst; var ptrToArr; // I want this to be a `forwarding ref` to the array - var ranges; type slicingExprType; + var ranges; + param isRankChange = false; proc init() { // this constructor is called to create dummy protoSlices that will never @@ -97,14 +98,17 @@ module ChapelArrayViewElision { var dummyArr = [1,]; this.ptrToArr = c_addrOf(dummyArr); - this.ranges = 1..0; - this.slicingExprType = this.ranges.type; + + var dummyRange = 1..0; + this.slicingExprType = dummyRange.type; + this.ranges = dummyRange; } proc init(param isConst, ptrToArr, slicingExprs) { this.rank = ptrToArr.deref().rank; this.isConst = isConst; this.ptrToArr = ptrToArr; + this.slicingExprType = slicingExprs.type; if isDomain(slicingExprs) { this.ranges = slicingExprs; } @@ -123,20 +127,21 @@ module ChapelArrayViewElision { } else if _validRankChangeArgs(slicingExprs, ptrToArr.deref().idxType) { this.ranges = rangify(slicingExprs); + this.isRankChange = true; } else { this.ranges = 1..0; // needed to keep the compiler happy compilerError("Unexpected slicing expr in chpl__protoSlice.init"); } - this.slicingExprType = slicingExprs.type; } proc init=(other: chpl__protoSlice) { this.rank = other.rank; this.isConst = other.isConst; this.ptrToArr = other.ptrToArr; - this.ranges = other.ranges; this.slicingExprType = other.slicingExprType; + this.ranges = other.ranges; + this.isRankChange = other.isRankChange; init this; halt("protoSlice copy initializer should never be called"); } @@ -367,11 +372,14 @@ module ChapelArrayViewElision { } proc chpl__ave_typesMatch(a: chpl__protoSlice, b: chpl__protoSlice) param: bool { + if a.isRankChange != b.isRankChange then return false; //or assert? + + if !a.isRankChange then return true; // nothing else to check + // we want to check that if there are integrals in the original slicing // expressions, they are at the same rank. In other words, if we are working // with rank-changes, we want to make sure that the collapsed dims on both // sides match - type aType = a.slicingExprType; type bType = b.slicingExprType; compilerAssert(a.slicingExprType.size == b.slicingExprType.size); From 584fd8a2e182c2174c6beeb4760e480a765241a1 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 28 Jun 2024 16:33:13 -0700 Subject: [PATCH 71/88] Add a distributed array test Signed-off-by: Engin Kayraklioglu --- .../arrayViewElision/distributed.chpl | 24 +++++++++++++++++++ .../arrayViewElision/distributed.compopts | 1 + .../arrayViewElision/distributed.good | 16 +++++++++++++ 3 files changed, 41 insertions(+) create mode 100644 test/optimizations/arrayViewElision/distributed.chpl create mode 100644 test/optimizations/arrayViewElision/distributed.compopts create mode 100644 test/optimizations/arrayViewElision/distributed.good diff --git a/test/optimizations/arrayViewElision/distributed.chpl b/test/optimizations/arrayViewElision/distributed.chpl new file mode 100644 index 000000000000..f6ecf3378b2e --- /dev/null +++ b/test/optimizations/arrayViewElision/distributed.chpl @@ -0,0 +1,24 @@ +const Space = {1..10}; + +// not exhaustive. +{ + use BlockDist; + + const D = Space dmapped new blockDist(Space); + var A: [D] int = 1; + var B: [D] int = 2; + + A[3..5] = B[3..5]; + writeln(A); +} + +{ + use CyclicDist; + + const D = Space dmapped new cyclicDist(Space.first); + var A: [D] int = 1; + var B: [D] int = 2; + + A[3..5] = B[3..5]; + writeln(A); +} diff --git a/test/optimizations/arrayViewElision/distributed.compopts b/test/optimizations/arrayViewElision/distributed.compopts new file mode 100644 index 000000000000..4e1569775cbb --- /dev/null +++ b/test/optimizations/arrayViewElision/distributed.compopts @@ -0,0 +1 @@ +--report-array-view-elision diff --git a/test/optimizations/arrayViewElision/distributed.good b/test/optimizations/arrayViewElision/distributed.good new file mode 100644 index 000000000000..c7a73524b514 --- /dev/null +++ b/test/optimizations/arrayViewElision/distributed.good @@ -0,0 +1,16 @@ +ArrayViewElision not supported distributed.chpl:11 + lhsBaseType: [BlockDom(1,int(64),one,unmanaged DefaultDist)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: + rhsIndexingExprs: + +ArrayViewElision not supported distributed.chpl:22 + lhsBaseType: [CyclicDom(1,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + rhsBaseType: + rhsIndexingExprs: + +1 1 2 2 2 1 1 1 1 1 +1 1 2 2 2 1 1 1 1 1 From 3900a3ee06c63cfdc2daf0f9d0e7195d89d90c9b Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 28 Jun 2024 17:08:34 -0700 Subject: [PATCH 72/88] Code cleanup Signed-off-by: Engin Kayraklioglu --- compiler/include/arrayViewElision.h | 10 +- compiler/optimizations/arrayViewElision.cpp | 36 +-- compiler/resolution/preFold.cpp | 2 +- modules/internal/ChapelArrayViewElision.chpl | 263 +++++++++--------- modules/internal/ChapelBase.chpl | 11 + .../internal/ChapelShortArrayTransfer.chpl | 18 +- 6 files changed, 176 insertions(+), 164 deletions(-) diff --git a/compiler/include/arrayViewElision.h b/compiler/include/arrayViewElision.h index e71aa8e81994..cae12de7da60 100644 --- a/compiler/include/arrayViewElision.h +++ b/compiler/include/arrayViewElision.h @@ -46,11 +46,11 @@ class ArrayViewElisionTransformer { }; // interface for prefold -class ProtoSliceAssignHelper { +class ArrayViewElisionPrefolder { public: - ProtoSliceAssignHelper() = delete; - ProtoSliceAssignHelper(CallExpr* call); - ~ProtoSliceAssignHelper(); + ArrayViewElisionPrefolder() = delete; + ArrayViewElisionPrefolder(CallExpr* call); + ~ArrayViewElisionPrefolder(); inline CondStmt* condStmt() const { return condStmt_; } inline bool supported() const { return supported_; } @@ -81,7 +81,7 @@ class ProtoSliceAssignHelper { void findCondStmt(); void findProtoSlices(); bool handleOneProtoSlice(bool isLhs); - bool protoSliceTypesMatch() const; + bool canAssign() const; CallExpr* findOneProtoSliceCall(Expr* e); Symbol* getFlagReplacement(); }; diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 7d5da474bb15..1e4cd8faeddd 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -145,7 +145,7 @@ void arrayViewElision() { } } -ProtoSliceAssignHelper::ProtoSliceAssignHelper(CallExpr* call): +ArrayViewElisionPrefolder::ArrayViewElisionPrefolder(CallExpr* call): call_(call), newProtoSliceLhs_(nullptr), newProtoSliceRhs_(nullptr), @@ -168,23 +168,23 @@ ProtoSliceAssignHelper::ProtoSliceAssignHelper(CallExpr* call): supported_ = handleOneProtoSlice(/* isLhs */ true) && handleOneProtoSlice(/* isLhs */ false) && - protoSliceTypesMatch(); + canAssign(); findCondStmt(); INT_ASSERT(condStmt_); } -ProtoSliceAssignHelper::~ProtoSliceAssignHelper() { +ArrayViewElisionPrefolder::~ArrayViewElisionPrefolder() { staticCheckBlock_->remove(); tmpCondFlag_->getSingleDef()->getStmtExpr()->remove(); tmpCondFlag_->defPoint->remove(); } -CallExpr* ProtoSliceAssignHelper::getReplacement() { +CallExpr* ArrayViewElisionPrefolder::getReplacement() { return new CallExpr("=", call_->get(1)->copy(), call_->get(2)->copy()); } -Symbol* ProtoSliceAssignHelper::getFlagReplacement() { +Symbol* ArrayViewElisionPrefolder::getFlagReplacement() { if (!supported_) { // we can't optimize return gFalse; @@ -211,12 +211,12 @@ Symbol* ProtoSliceAssignHelper::getFlagReplacement() { } } -void ProtoSliceAssignHelper::updateAndFoldConditional() { +void ArrayViewElisionPrefolder::updateAndFoldConditional() { condStmt_->condExpr->replace(new SymExpr(getFlagReplacement())); condStmt_->foldConstantCondition(/*addEndOfStatement*/ false); } -void ProtoSliceAssignHelper::report() { +void ArrayViewElisionPrefolder::report() { if (!fReportArrayViewElision) return; if (ModuleSymbol* mod = call_->getModule()) { // if there's no user module, getModule could return null @@ -245,7 +245,7 @@ void ProtoSliceAssignHelper::report() { std::cout << std::endl; } -bool ProtoSliceAssignHelper::handleOneProtoSlice(bool isLhs) { +bool ArrayViewElisionPrefolder::handleOneProtoSlice(bool isLhs) { CallExpr* call = isLhs ? newProtoSliceLhs_ : newProtoSliceRhs_; INT_ASSERT(call->isNamed("chpl__createProtoSlice") || @@ -270,7 +270,7 @@ bool ProtoSliceAssignHelper::handleOneProtoSlice(bool isLhs) { } } - CallExpr* typeCheck = new CallExpr("chpl__typesSupportArrayViewElision"); + CallExpr* typeCheck = new CallExpr("chpl__ave_exprCanBeProtoSlice"); for_actuals (actual, call) { //nprint_view(actual); INT_ASSERT(isSymExpr(actual)); @@ -300,17 +300,17 @@ bool ProtoSliceAssignHelper::handleOneProtoSlice(bool isLhs) { // because the current optimization works by strength-reducing that to use // slices : A[3..3, 1..3] = B[1..3, 3..3]. That's not a valid assignment as // opposed to what the user wrote. -bool ProtoSliceAssignHelper::protoSliceTypesMatch() const { - CallExpr* typeMatchCheck = new CallExpr("chpl__ave_typesMatch", - call_->get(1)->copy(), - call_->get(2)->copy()); +bool ArrayViewElisionPrefolder::canAssign() const { + CallExpr* canAssign = new CallExpr("chpl__ave_protoSlicesSupportAssignment", + call_->get(1)->copy(), + call_->get(2)->copy()); VarSymbol* tmp = newTemp("call_tmp", dtBool); - DefExpr* flagDef = new DefExpr(tmp, typeMatchCheck); + DefExpr* flagDef = new DefExpr(tmp, canAssign); staticCheckBlock_->insertAtTail(flagDef); - resolveExpr(typeMatchCheck); + resolveExpr(canAssign); resolveExpr(flagDef); bool ret = (toSymExpr(flagDef->init)->symbol() == gTrue); @@ -322,7 +322,7 @@ bool ProtoSliceAssignHelper::protoSliceTypesMatch() const { // e must be the lhs or rhs of PRIM_ASSIGN_PROTO_SLICES // returns the `chpl__createProtoSlice call -CallExpr* ProtoSliceAssignHelper::findOneProtoSliceCall(Expr* e) { +CallExpr* ArrayViewElisionPrefolder::findOneProtoSliceCall(Expr* e) { SymExpr* symExpr = toSymExpr(e); INT_ASSERT(symExpr); @@ -344,13 +344,13 @@ CallExpr* ProtoSliceAssignHelper::findOneProtoSliceCall(Expr* e) { return toCallExpr(move->get(2)); } -void ProtoSliceAssignHelper::findProtoSlices() { +void ArrayViewElisionPrefolder::findProtoSlices() { //nprint_view(call_); newProtoSliceLhs_ = findOneProtoSliceCall(call_->get(1)); newProtoSliceRhs_ = findOneProtoSliceCall(call_->get(2)); } -void ProtoSliceAssignHelper::findCondStmt() { +void ArrayViewElisionPrefolder::findCondStmt() { Expr* cur = call_; while (cur) { if (CondStmt* condStmt = toCondStmt(cur)) { diff --git a/compiler/resolution/preFold.cpp b/compiler/resolution/preFold.cpp index 9be68ec22aef..3f89ef8c27c6 100644 --- a/compiler/resolution/preFold.cpp +++ b/compiler/resolution/preFold.cpp @@ -918,7 +918,7 @@ static Expr* preFoldPrimOp(CallExpr* call) { } case PRIM_PROTO_SLICE_ASSIGN: { - ProtoSliceAssignHelper assignment(call); + ArrayViewElisionPrefolder assignment(call); if (assignment.supported()) { retval = assignment.getReplacement(); diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 4d2108e5b68c..8474f5425c31 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -25,62 +25,77 @@ module ChapelArrayViewElision { use CTypes; use ChapelArray only _validRankChangeArgs; - // TODO the following can be removed? - proc isProtoSlice(a) param { return isSubtype(a.type, chpl__protoSlice); } + // + // compiler interface + // + + proc chpl__createProtoSlice(ref Arr, slicingExprs ...) + where chpl__createProtoSliceArgCheck(Arr, slicingExprs) { - proc isArrayOrProtoSlice(a) param { - return isArray(a) || isProtoSlice(a); + if slicingExprs.size == 1 then + return new chpl__protoSlice(isConst=false, c_addrOf(Arr), + slicingExprs[0]); + else + return new chpl__protoSlice(isConst=false, c_addrOf(Arr), slicingExprs); } - proc areBothArraysOrProtoSlices(a, b) { - return isArrayOrProtoSlice(a) && isArrayOrProtoSlice(b); + proc chpl__createConstProtoSlice(const ref Arr, slicingExprs ...) + where chpl__createProtoSliceArgCheck(Arr, slicingExprs) { + + if slicingExprs.size == 1 { + return new chpl__protoSlice(isConst=true, c_addrOfConst(Arr), + slicingExprs[0]); + } + else + return new chpl__protoSlice(isConst=true, c_addrOfConst(Arr), + slicingExprs); } - proc rangify(rcTup) { - compilerAssert(isTuple(rcTup)); + // catch-all: nothing here is supported, just pretend creating a proto slice. + // The branch where this call is will be dropped during resolution. We just + // want to avoid resolution errors before that happens + proc chpl__createProtoSlice(x, slicingExprs... ) { + return new chpl__protoSlice(); + } - proc createRangifiedType(type rcTupType) type { - proc recurse(type curType, param dim) type { - if dim == rcTupType.size { - return curType; - } - else if isRangeType(rcTupType[dim]) { - if dim == 0 { - return recurse((rcTupType[dim],), dim+1); - } - else { - return recurse(((...curType), rcTupType[dim]), dim+1); - } - } - else { - const dummy: rcTupType[dim]; - const dummyRange = dummy..dummy; - if dim == 0 { - return recurse((dummyRange.type,), dim+1); - } - else { - return recurse(((...curType), dummyRange.type), dim+1); - } - } - } + // catch-all: nothing here is supported, just pretend creating a proto slice. + // The branch where this call is will be dropped during resolution. We just + // want to avoid resolution errors before that happens + proc chpl__createConstProtoSlice(x, slicingExprs... ) { + return new chpl__protoSlice(); + } - return recurse(nothing, 0); - } + proc chpl__ave_exprCanBeProtoSlice(base, idxExprs...) param: bool { + return chpl__ave_baseTypeSupports(base) && + chpl__ave_idxExprsSupport(base.idxType, (...idxExprs)); + } - var ret: createRangifiedType(rcTup.type); + proc chpl__ave_protoSlicesSupportAssignment(a: chpl__protoSlice, + b: chpl__protoSlice) param: bool { + if a.isRankChange != b.isRankChange then return false; //or assert? - for param i in 0.. b; } + + // check if both arguments are local without `.locale` or `here` + proc chpl__bothLocal(const ref a, const ref b) { + extern proc chpl_equals_localeID(const ref x, const ref y): bool; + + const aLoc = __primitive("_wide_get_locale", a._value); + const bLoc = __primitive("_wide_get_locale", b._value); + + return chpl_equals_localeID(aLoc, bLoc) && + chpl_equals_localeID(aLoc, here_id); + } } diff --git a/modules/internal/ChapelShortArrayTransfer.chpl b/modules/internal/ChapelShortArrayTransfer.chpl index 812178b6e326..6a41f63eef2c 100644 --- a/modules/internal/ChapelShortArrayTransfer.chpl +++ b/modules/internal/ChapelShortArrayTransfer.chpl @@ -40,6 +40,7 @@ module ChapelShortArrayTransfer { inline proc chpl__dynamicCheckShortArrayTransfer(a, b) { param localCompilation = _local && CHPL_LOCALE_MODEL=="flat"; const sizeOk = a.sizeAs(uint) < shortArrayTransferThreshold; + if debugShortArrayTransfer { chpl_debug_writeln(" Size: ", a.sizeAs(uint), " Threshold: ", shortArrayTransferThreshold); @@ -48,21 +49,22 @@ module ChapelShortArrayTransfer { else chpl_debug_writeln(" size doesn't qualify"); } + if localCompilation { return sizeOk; } else { // No `.locale` to avoid overheads. Note that this is an optimization for // fast-running code. Small things matter. - // TODO, use chpl__bothLocal - const sameLocale = __primitive("_wide_get_locale", a) == - __primitive("_wide_get_locale", b); - if sameLocale then - chpl_debug_writeln(" locality qualifies"); - else - chpl_debug_writeln(" locality does not qualify"); + const bothLocal = chpl__bothLocal(a, b); + if debugShortArrayTransfer { + if bothLocal then + chpl_debug_writeln(" locality qualifies"); + else + chpl_debug_writeln(" locality does not qualify"); + } - return sizeOk && sameLocale; + return sizeOk && bothLocal; } } } From 52ac4b3ab896eb25f650f68efc8d4eeed9a6342a Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 28 Jun 2024 17:27:38 -0700 Subject: [PATCH 73/88] Put the cast back Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 8474f5425c31..b7dc3fd209de 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -292,6 +292,12 @@ module ChapelArrayViewElision { lhs.ranges == rhs.ranges; } + + // we need this because we support assignment between two proto slices + inline operator :(ref a: chpl__protoSlice, type b: chpl__protoSlice) { + compilerError("Should never cast proto slices"); + } + proc isProtoSlice(a) param { return isSubtype(a.type, chpl__protoSlice); } // From e4dd6243b7e3a16a1032b4e8f9a7d6fd3440ab62 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Mon, 1 Jul 2024 10:46:44 -0700 Subject: [PATCH 74/88] Cover one more case pointed out by Jeremiah Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 3 +- .../arrayViewElision/rankChanges.chpl | 4 ++ .../arrayViewElision/rankChanges.good | 40 ++++++++++++++++++- 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index b7dc3fd209de..4597da7c218e 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -141,7 +141,8 @@ module ChapelArrayViewElision { this.ranges = tupleOfRangesSlice(ptrToArr.deref().dims(), slicingExprs); } else if _validRankChangeArgs(slicingExprs, ptrToArr.deref().idxType) { - this.ranges = rangify(slicingExprs); + this.ranges = tupleOfRangesSlice(ptrToArr.deref().dims(), + rangify(slicingExprs)); this.isRankChange = true; } else { diff --git a/test/optimizations/arrayViewElision/rankChanges.chpl b/test/optimizations/arrayViewElision/rankChanges.chpl index 7a55569426b7..6dede159716e 100644 --- a/test/optimizations/arrayViewElision/rankChanges.chpl +++ b/test/optimizations/arrayViewElision/rankChanges.chpl @@ -11,6 +11,10 @@ proc testAndReset() { A[3, 1..3] = B[3, 1..3]; testAndReset(); A[1..3, 3] = B[1..3, 3]; testAndReset(); +// supported: +A[3, ..] = B[3, ..]; testAndReset(); +A[.., 3] = B[.., 3]; testAndReset(); + // unsupported: A[3, 1..3] = B[1..3, 3]; testAndReset(); A[1..3, 3] = B[3, 1..3]; testAndReset(); diff --git a/test/optimizations/arrayViewElision/rankChanges.good b/test/optimizations/arrayViewElision/rankChanges.good index 3780853972f7..d23ae64ca555 100644 --- a/test/optimizations/arrayViewElision/rankChanges.good +++ b/test/optimizations/arrayViewElision/rankChanges.good @@ -18,7 +18,27 @@ ArrayViewElision supported rankChanges.chpl:12 range(int(64),both,one) int(64) -ArrayViewElision not supported rankChanges.chpl:15 +ArrayViewElision supported rankChanges.chpl:15 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + int(64) + range(int(64),neither,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + int(64) + range(int(64),neither,one) + +ArrayViewElision supported rankChanges.chpl:16 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + int(64) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + int(64) + +ArrayViewElision not supported rankChanges.chpl:19 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: int(64) @@ -28,7 +48,7 @@ ArrayViewElision not supported rankChanges.chpl:15 range(int(64),both,one) int(64) -ArrayViewElision not supported rankChanges.chpl:16 +ArrayViewElision not supported rankChanges.chpl:20 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -54,6 +74,22 @@ ArrayViewElision not supported rankChanges.chpl:16 1 1 1 1 1 1 1 1 1 1 + Size: 5 Threshold: 50 + size qualifies +1 1 1 1 1 +1 1 1 1 1 +2 2 2 2 2 +1 1 1 1 1 +1 1 1 1 1 + + Size: 5 Threshold: 50 + size qualifies +1 1 2 1 1 +1 1 2 1 1 +1 1 2 1 1 +1 1 2 1 1 +1 1 2 1 1 + 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 From a75fbd908ec55ed30d05503c1d60f04bd63a3a89 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Mon, 1 Jul 2024 15:28:26 -0700 Subject: [PATCH 75/88] Add header comments to explain optimizations Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/arrayViewElision.cpp | 71 +++++++++++++++++++ modules/internal/ChapelArrayViewElision.chpl | 39 ++++++++++ .../internal/ChapelShortArrayTransfer.chpl | 6 ++ 3 files changed, 116 insertions(+) diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index 1e4cd8faeddd..b755e9446a45 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -26,6 +26,77 @@ #include "resolution.h" #include "view.h" +// Array View Elision (AVE) aims to optimize assignments involving array views. +// Currently, this is limited to: +// +// slice = slice, and +// rank-change = rank-change +// +// This is mostly out of abundance of caution and could be extended to +// assignments involving arrays, too +// +// The gist of the implementation is based on eliding array views from +// operations such as the ones above. Note that this implies that the following +// cannot be covered: +// +// ref slice = A[1..5]; +// slice = A[6..10]; +// +// As determining whether `slice` can be dropped is more complicated than I +// could bite at the moment. So, both sides of the assignments must be +// array-view generating expressions for this optimization to fire. +// +// There are two parts of this optimization: +// +// 1. Pre-normalize (ArrayViewElisionTransformer is the type doing this) +// +// Given a statement like +// +// A[x] = B[y]; +// +// we generate +// +// param array_view_elision: bool; // will be replaced during resolution +// if (array_view_elision) { +// var protoSlice1 = chpl__createProtoSlice(A, x); +// var protoSlice2 = chpl__createProtoSlice(B, y); +// +// __primitive(PRIM_PROTO_SLICE_ASSIGN, protoSlice1, protoSlice2); +// } +// else { +// A[x] = B[y]; +// } +// +// Here the "protoSlice" has type `chpl__protoSlice`. See +// modules/internal/ChapelArrayViewElision.chpl for the details of that type. +// The main purpose of that type is to represent the expression that would +// create an array view. But avoid doing that. +// +// 2. During prefold (ArrayViewElisionPrefolder is the type doing this) +// +// Operation revolves around `PRIM_PROTO_SLICE_ASSIGN`. The +// ArrayViewElisionPrefolder is in charge of finding the other relevant AST (the +// CondStmt, the protoSlice temps etc) and transforming the conditional. +// +// Statically, chpl__ave_exprCanBeProtoSlice is called on both protoSlices to +// make sure that the module code is OK with creating protoSlices out of those +// expressions. We also check whether two protoSlices can be assigned to one +// another. This is done by chpl__ave_protoSlicesSupportAssignment. If `fLocal`, +// that's sufficient. Calls to that function are inserted, resolved, the result +// is collected, and finally the calls are removed. At that point, we drop the +// `array_view_elision` flag completely, and replace it with `true` or `false`, +// after which the conditional statement is constant-folded. +// +// If not `fLocal`, we also call `chpl_bothLocal` an replace the flag with the +// result of that. Note that this is a dynamic check, meaning that the +// conditional will not be removed. +// +// This optimization is on-by-default. It can be controlled with +// `--[no-]array-view-elision`. Additionally, there's also +// `--report-array-view-elision` flag to enable some output during compilation +// to help with understanding what's optimized and what's not. + + ArrayViewElisionTransformer::ArrayViewElisionTransformer(CallExpr* origCall): origCall_(origCall) { diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 4597da7c218e..95a2bedd7b05 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -28,6 +28,9 @@ module ChapelArrayViewElision { // // compiler interface // + // Calls to these functions are inserted by the compiler. See + // compiler/optimizations/arrayViewElision.cpp for how the compiler makes use + // of these functions. proc chpl__createProtoSlice(ref Arr, slicingExprs ...) where chpl__createProtoSliceArgCheck(Arr, slicingExprs) { @@ -96,6 +99,36 @@ module ChapelArrayViewElision { // // proto slice type // + // This is the type we create in lieu of full-blown array views. The key + // functionality for this record are: + // + // 1. Supporting assignment with `=` + // 2. Supporting bulk transfer + // 3. Supporting serial and parallel iterations for non-bulk array transfer. + // + // Gotchas/caveats for the implementation: + // + // 1. We don't have `ref` fields yet. So, `ptrToArr` is used, and it stores + // the `c_addrOf` of the array in question. So it points to the _array + // record. + // 2. `var ranges` is tricky. We want to avoid creating domains unless given + // by the user. In addition, I also wanted to avoid creating tuples, again, + // unless given by the user. An important context for this optimization is + // that it is for small array transfers, so small overheads like that could + // add up. In order to support that, `var ranges` could have 3 different + // types. It could be + // a. a range, + // b. a tuple of ranges, + // c. a domain (only if that's what the user gives us) + // There's some conditionals to handle that in the code, so heads up. + // 3. Rank-change views are represented by slices. Right now, this + // optimization only fires for rank-changes if: + // a. both sides of the assignment are rank changes + // b. both rank-changes have the same number of arguments + // This implies that such rank-changes can be represented by slices where + // the collapsed dimensions could be ranges of size 1. `param isRankChange` + // field tells you whether a particular protoSlice is actually a + // rank-change logically. record chpl__protoSlice { param rank; @@ -213,6 +246,12 @@ module ChapelArrayViewElision { return ptrToArr.deref().isRectangular(); } + // NOTE: the iterators below are not fun to look at because of all the + // repetition. We want to have `ref` yield intent if the protoSlice was + // based on a non-const array and `const` yield intent if it was a const + // array. The pragma `reference to const when const this` doesn't work here, + // nor we have a good support for yield intent overloading. + // https://github.com/chapel-lang/chapel/issues/7000 is related. iter these() ref where !isConst { if rank == 1 then { foreach elem in chpl__serialViewIter1D(ptrToArr.deref()._instance, diff --git a/modules/internal/ChapelShortArrayTransfer.chpl b/modules/internal/ChapelShortArrayTransfer.chpl index 6a41f63eef2c..38ddbf69b5cf 100644 --- a/modules/internal/ChapelShortArrayTransfer.chpl +++ b/modules/internal/ChapelShortArrayTransfer.chpl @@ -18,6 +18,11 @@ * limitations under the License. */ +// This is a helper module to support the Short Array Transfer optimization. +// For array-to-array assignments where number of elements (or maybe the data +// size?) are below some threshold, just looping serially is more efficient +// than both parallel iteration and memcpy. + module ChapelShortArrayTransfer { use ChapelBase; use ChapelDebugPrint only chpl_debug_writeln; @@ -31,6 +36,7 @@ module ChapelShortArrayTransfer { config const shortArrayTransferThreshold = 50; + proc chpl__staticCheckShortArrayTransfer(a, b) param { // this is the case I'm focusing on in the initial PR. This can definitely // be loosened up... by a lot. From f5bee7e0c09c97930ddf54255859578f06d596db Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Mon, 1 Jul 2024 15:32:44 -0700 Subject: [PATCH 76/88] Add one more condition in comment, add test Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelArrayViewElision.chpl | 1 + .../arrayViewElision/rankChangeFunky.chpl | 6 ++++++ .../arrayViewElision/rankChangeFunky.compopts | 1 + .../arrayViewElision/rankChangeFunky.good | 13 +++++++++++++ 4 files changed, 21 insertions(+) create mode 100644 test/optimizations/arrayViewElision/rankChangeFunky.chpl create mode 100644 test/optimizations/arrayViewElision/rankChangeFunky.compopts create mode 100644 test/optimizations/arrayViewElision/rankChangeFunky.good diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 95a2bedd7b05..7604bbc4d5ff 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -125,6 +125,7 @@ module ChapelArrayViewElision { // optimization only fires for rank-changes if: // a. both sides of the assignment are rank changes // b. both rank-changes have the same number of arguments + // c. argument types match pair-wise (int to int, range to range) // This implies that such rank-changes can be represented by slices where // the collapsed dimensions could be ranges of size 1. `param isRankChange` // field tells you whether a particular protoSlice is actually a diff --git a/test/optimizations/arrayViewElision/rankChangeFunky.chpl b/test/optimizations/arrayViewElision/rankChangeFunky.chpl new file mode 100644 index 000000000000..36154ef6fc71 --- /dev/null +++ b/test/optimizations/arrayViewElision/rankChangeFunky.chpl @@ -0,0 +1,6 @@ +var A: [1..5, 1..5, 1..5] int = 1; +var B: [1..5, 1..5, 1..5] int = 2; + +A[1, 2, 3..12] = B[1..5, 2, 3..7]; + +writeln(A); diff --git a/test/optimizations/arrayViewElision/rankChangeFunky.compopts b/test/optimizations/arrayViewElision/rankChangeFunky.compopts new file mode 100644 index 000000000000..4e1569775cbb --- /dev/null +++ b/test/optimizations/arrayViewElision/rankChangeFunky.compopts @@ -0,0 +1 @@ +--report-array-view-elision diff --git a/test/optimizations/arrayViewElision/rankChangeFunky.good b/test/optimizations/arrayViewElision/rankChangeFunky.good new file mode 100644 index 000000000000..8c88811a36ca --- /dev/null +++ b/test/optimizations/arrayViewElision/rankChangeFunky.good @@ -0,0 +1,13 @@ +ArrayViewElision not supported rankChangeFunky.chpl:4 + lhsBaseType: [domain(3,int(64),one)] int(64) + lhsIndexingExprs: + int(64) + int(64) + range(int(64),both,one) + rhsBaseType: [domain(3,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + int(64) + range(int(64),both,one) + +rankChangeFunky.chpl:4: error: rank mismatch in array assignment From 935c38297e10caccfc141791495db265b59b9c31 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 3 Jul 2024 13:41:27 -0700 Subject: [PATCH 77/88] Partially address Ben's feedback Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/arrayViewElision.cpp | 6 ------ modules/internal/ChapelShortArrayTransfer.chpl | 4 ++-- test/optimizations/arrayViewElision/remoteDR.hcpl | 1 - 3 files changed, 2 insertions(+), 9 deletions(-) delete mode 100644 test/optimizations/arrayViewElision/remoteDR.hcpl diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index b755e9446a45..e7159c5d7832 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -24,7 +24,6 @@ #include "global-ast-vecs.h" #include "passes.h" #include "resolution.h" -#include "view.h" // Array View Elision (AVE) aims to optimize assignments involving array views. // Currently, this is limited to: @@ -343,7 +342,6 @@ bool ArrayViewElisionPrefolder::handleOneProtoSlice(bool isLhs) { CallExpr* typeCheck = new CallExpr("chpl__ave_exprCanBeProtoSlice"); for_actuals (actual, call) { - //nprint_view(actual); INT_ASSERT(isSymExpr(actual)); typeCheck->insertAtTail(actual->copy()); } @@ -401,8 +399,6 @@ CallExpr* ArrayViewElisionPrefolder::findOneProtoSliceCall(Expr* e) { CallExpr* tmpMove = toCallExpr(sym->getSingleDef()->getStmtExpr()); INT_ASSERT(tmpMove && tmpMove->isPrimitive(PRIM_MOVE)); - //nprint_view(lhsTmpMove); - SymExpr* tmpSymExpr = toSymExpr(tmpMove->get(2)); INT_ASSERT(tmpSymExpr); @@ -410,8 +406,6 @@ CallExpr* ArrayViewElisionPrefolder::findOneProtoSliceCall(Expr* e) { CallExpr* move = toCallExpr(tmpSym->getSingleDef()->getStmtExpr()); INT_ASSERT(move && move->isPrimitive(PRIM_MOVE)); - //nprint_view(lhsMove); - return toCallExpr(move->get(2)); } diff --git a/modules/internal/ChapelShortArrayTransfer.chpl b/modules/internal/ChapelShortArrayTransfer.chpl index 38ddbf69b5cf..8cdbb0e00f03 100644 --- a/modules/internal/ChapelShortArrayTransfer.chpl +++ b/modules/internal/ChapelShortArrayTransfer.chpl @@ -38,8 +38,8 @@ module ChapelShortArrayTransfer { proc chpl__staticCheckShortArrayTransfer(a, b) param { - // this is the case I'm focusing on in the initial PR. This can definitely - // be loosened up... by a lot. + // Engin: this is the case I'm focusing on in the initial PR. This can + // definitely be loosened up... by a lot. return !disableShortArrayTransfer && isProtoSlice(a) && isProtoSlice(b); } diff --git a/test/optimizations/arrayViewElision/remoteDR.hcpl b/test/optimizations/arrayViewElision/remoteDR.hcpl deleted file mode 100644 index 90661451f784..000000000000 --- a/test/optimizations/arrayViewElision/remoteDR.hcpl +++ /dev/null @@ -1 +0,0 @@ -var A: From 9972073fd3d56550ab7f6a5e685c8c02a399fe12 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 3 Jul 2024 13:58:52 -0700 Subject: [PATCH 78/88] Add assertions to slices test Signed-off-by: Engin Kayraklioglu --- .../slices-1d-domain.comm-none.good | 4 ++-- .../arrayViewElision/slices-1d-domain.good | 4 ++-- .../slices-2d-domain.comm-none.good | 4 ++-- .../arrayViewElision/slices-2d-domain.good | 4 ++-- .../arrayViewElision/slices.chpl | 24 ++++++++++++------- 5 files changed, 23 insertions(+), 17 deletions(-) diff --git a/test/optimizations/arrayViewElision/slices-1d-domain.comm-none.good b/test/optimizations/arrayViewElision/slices-1d-domain.comm-none.good index ef70b2610bcd..390e6acb35c2 100644 --- a/test/optimizations/arrayViewElision/slices-1d-domain.comm-none.good +++ b/test/optimizations/arrayViewElision/slices-1d-domain.comm-none.good @@ -1,4 +1,4 @@ -ArrayViewElision supported slices.chpl:23 +ArrayViewElision supported slices.chpl:24 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: domain(1,int(64),one) @@ -6,7 +6,7 @@ ArrayViewElision supported slices.chpl:23 rhsIndexingExprs: domain(1,int(64),one) -ArrayViewElision supported slices.chpl:23 +ArrayViewElision supported slices.chpl:24 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: domain(1,int(64),one) diff --git a/test/optimizations/arrayViewElision/slices-1d-domain.good b/test/optimizations/arrayViewElision/slices-1d-domain.good index e8f91f6bf314..d20fc9b51b38 100644 --- a/test/optimizations/arrayViewElision/slices-1d-domain.good +++ b/test/optimizations/arrayViewElision/slices-1d-domain.good @@ -1,4 +1,4 @@ -ArrayViewElision supported (dynamic locality check required) slices.chpl:23 +ArrayViewElision supported (dynamic locality check required) slices.chpl:24 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: domain(1,int(64),one) @@ -6,7 +6,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:23 rhsIndexingExprs: domain(1,int(64),one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:23 +ArrayViewElision supported (dynamic locality check required) slices.chpl:24 lhsBaseType: [domain(1,int(64),one)] int(64) lhsIndexingExprs: domain(1,int(64),one) diff --git a/test/optimizations/arrayViewElision/slices-2d-domain.comm-none.good b/test/optimizations/arrayViewElision/slices-2d-domain.comm-none.good index d5b8de10f2b6..9e85e4626b52 100644 --- a/test/optimizations/arrayViewElision/slices-2d-domain.comm-none.good +++ b/test/optimizations/arrayViewElision/slices-2d-domain.comm-none.good @@ -1,4 +1,4 @@ -ArrayViewElision supported slices.chpl:23 +ArrayViewElision supported slices.chpl:24 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: domain(2,int(64),one) @@ -6,7 +6,7 @@ ArrayViewElision supported slices.chpl:23 rhsIndexingExprs: domain(2,int(64),one) -ArrayViewElision supported slices.chpl:23 +ArrayViewElision supported slices.chpl:24 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: domain(2,int(64),one) diff --git a/test/optimizations/arrayViewElision/slices-2d-domain.good b/test/optimizations/arrayViewElision/slices-2d-domain.good index f5abd827ad94..1a9478d6684b 100644 --- a/test/optimizations/arrayViewElision/slices-2d-domain.good +++ b/test/optimizations/arrayViewElision/slices-2d-domain.good @@ -1,4 +1,4 @@ -ArrayViewElision supported (dynamic locality check required) slices.chpl:23 +ArrayViewElision supported (dynamic locality check required) slices.chpl:24 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: domain(2,int(64),one) @@ -6,7 +6,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:23 rhsIndexingExprs: domain(2,int(64),one) -ArrayViewElision supported (dynamic locality check required) slices.chpl:23 +ArrayViewElision supported (dynamic locality check required) slices.chpl:24 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: domain(2,int(64),one) diff --git a/test/optimizations/arrayViewElision/slices.chpl b/test/optimizations/arrayViewElision/slices.chpl index 90b20ba4ab79..803d941367a5 100644 --- a/test/optimizations/arrayViewElision/slices.chpl +++ b/test/optimizations/arrayViewElision/slices.chpl @@ -1,5 +1,4 @@ -config param useDomain = true; -config param rank = 1; +config param useDomain = true, rank = 1; proc multuplify(param rank, x) { var ret: rank*x.type; @@ -11,27 +10,34 @@ proc multuplify(param rank, x) { var arrRanges = multuplify(rank, 1..5); -var A: [(...arrRanges)] int = 1; -var B: [(...arrRanges)] int = 2; +param aVal = 1, bVal = 2; + +var A: [(...arrRanges)] int = aVal; +var B: [(...arrRanges)] int = bVal; var counter = 1; proc test(dstRange, srcRange) { const dstSlice = multuplify(rank, dstRange); const srcSlice = multuplify(rank, dstRange); - if useDomain { + if useDomain then A[{(...dstSlice)}] = B[{(...srcSlice)}]; - } - else { + else A[(...dstSlice)] = B[(...srcSlice)]; - } + + const checkDom = A.domain[(...dstSlice)]; writeln("Test ", counter); + for idx in A.domain { + const checkVal = if checkDom.contains(idx) then bVal else aVal; + assert(A[idx] == checkVal); + } + writeln(A); writeln(); counter += 1; - A = 1; // reset + A = aVal; // reset } writeln("Set first two:"); From 92e5f1b52ad937659fb4b350e5edd0576d27669f Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 3 Jul 2024 14:01:55 -0700 Subject: [PATCH 79/88] Fix a good file Signed-off-by: Engin Kayraklioglu --- .../rankChanges.comm-none.good | 104 ++++++++++++++++++ .../arrayViewElision/rankChanges.good | 12 +- 2 files changed, 112 insertions(+), 4 deletions(-) create mode 100644 test/optimizations/arrayViewElision/rankChanges.comm-none.good diff --git a/test/optimizations/arrayViewElision/rankChanges.comm-none.good b/test/optimizations/arrayViewElision/rankChanges.comm-none.good new file mode 100644 index 000000000000..d23ae64ca555 --- /dev/null +++ b/test/optimizations/arrayViewElision/rankChanges.comm-none.good @@ -0,0 +1,104 @@ +ArrayViewElision supported rankChanges.chpl:11 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + int(64) + range(int(64),both,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + int(64) + range(int(64),both,one) + +ArrayViewElision supported rankChanges.chpl:12 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + int(64) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + int(64) + +ArrayViewElision supported rankChanges.chpl:15 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + int(64) + range(int(64),neither,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + int(64) + range(int(64),neither,one) + +ArrayViewElision supported rankChanges.chpl:16 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),neither,one) + int(64) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),neither,one) + int(64) + +ArrayViewElision not supported rankChanges.chpl:19 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + int(64) + range(int(64),both,one) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + range(int(64),both,one) + int(64) + +ArrayViewElision not supported rankChanges.chpl:20 + lhsBaseType: [domain(2,int(64),one)] int(64) + lhsIndexingExprs: + range(int(64),both,one) + int(64) + rhsBaseType: [domain(2,int(64),one)] int(64) + rhsIndexingExprs: + int(64) + range(int(64),both,one) + + Size: 3 Threshold: 50 + size qualifies +1 1 1 1 1 +1 1 1 1 1 +2 2 2 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 3 Threshold: 50 + size qualifies +1 1 2 1 1 +1 1 2 1 1 +1 1 2 1 1 +1 1 1 1 1 +1 1 1 1 1 + + Size: 5 Threshold: 50 + size qualifies +1 1 1 1 1 +1 1 1 1 1 +2 2 2 2 2 +1 1 1 1 1 +1 1 1 1 1 + + Size: 5 Threshold: 50 + size qualifies +1 1 2 1 1 +1 1 2 1 1 +1 1 2 1 1 +1 1 2 1 1 +1 1 2 1 1 + +1 1 1 1 1 +1 1 1 1 1 +2 2 2 1 1 +1 1 1 1 1 +1 1 1 1 1 + +1 1 2 1 1 +1 1 2 1 1 +1 1 2 1 1 +1 1 1 1 1 +1 1 1 1 1 + diff --git a/test/optimizations/arrayViewElision/rankChanges.good b/test/optimizations/arrayViewElision/rankChanges.good index d23ae64ca555..53c5916135db 100644 --- a/test/optimizations/arrayViewElision/rankChanges.good +++ b/test/optimizations/arrayViewElision/rankChanges.good @@ -1,4 +1,4 @@ -ArrayViewElision supported rankChanges.chpl:11 +ArrayViewElision supported (dynamic locality check required) rankChanges.chpl:11 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: int(64) @@ -8,7 +8,7 @@ ArrayViewElision supported rankChanges.chpl:11 int(64) range(int(64),both,one) -ArrayViewElision supported rankChanges.chpl:12 +ArrayViewElision supported (dynamic locality check required) rankChanges.chpl:12 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),both,one) @@ -18,7 +18,7 @@ ArrayViewElision supported rankChanges.chpl:12 range(int(64),both,one) int(64) -ArrayViewElision supported rankChanges.chpl:15 +ArrayViewElision supported (dynamic locality check required) rankChanges.chpl:15 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: int(64) @@ -28,7 +28,7 @@ ArrayViewElision supported rankChanges.chpl:15 int(64) range(int(64),neither,one) -ArrayViewElision supported rankChanges.chpl:16 +ArrayViewElision supported (dynamic locality check required) rankChanges.chpl:16 lhsBaseType: [domain(2,int(64),one)] int(64) lhsIndexingExprs: range(int(64),neither,one) @@ -60,6 +60,7 @@ ArrayViewElision not supported rankChanges.chpl:20 Size: 3 Threshold: 50 size qualifies + locality qualifies 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 @@ -68,6 +69,7 @@ ArrayViewElision not supported rankChanges.chpl:20 Size: 3 Threshold: 50 size qualifies + locality qualifies 1 1 2 1 1 1 1 2 1 1 1 1 2 1 1 @@ -76,6 +78,7 @@ ArrayViewElision not supported rankChanges.chpl:20 Size: 5 Threshold: 50 size qualifies + locality qualifies 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 @@ -84,6 +87,7 @@ ArrayViewElision not supported rankChanges.chpl:20 Size: 5 Threshold: 50 size qualifies + locality qualifies 1 1 2 1 1 1 1 2 1 1 1 1 2 1 1 From 933666d708e118030b8900f311d63cfaf4d68b0a Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 3 Jul 2024 14:09:41 -0700 Subject: [PATCH 80/88] Disable optimization in a bulkcomm test Signed-off-by: Engin Kayraklioglu --- test/optimizations/bulkcomm/bharshbarg/remote.compopts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/optimizations/bulkcomm/bharshbarg/remote.compopts b/test/optimizations/bulkcomm/bharshbarg/remote.compopts index 3cd49f6f5652..d85ae900b909 100644 --- a/test/optimizations/bulkcomm/bharshbarg/remote.compopts +++ b/test/optimizations/bulkcomm/bharshbarg/remote.compopts @@ -1 +1 @@ --sdebugBulkTransfer -sdebugDefaultDistBulkTransfer -suseBulkTransferStride -M ../common +--no-array-view-elision -sdebugBulkTransfer -sdebugDefaultDistBulkTransfer -suseBulkTransferStride -M ../common From 647764da6d5ec86921eddc9c8f5f3297d6d133ad Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 3 Jul 2024 14:40:16 -0700 Subject: [PATCH 81/88] Tighten expected get counts in a commcount test Signed-off-by: Engin Kayraklioglu --- test/performance/ferguson/array/remote-array-write.chpl | 2 +- test/performance/ferguson/record/remote-record-write-copy.chpl | 2 +- test/performance/ferguson/record/remote-record-write.chpl | 2 +- test/performance/ferguson/tuple/remote-tuple-write-copy.chpl | 2 +- .../ferguson/tuple/remote-tuple-write-small-copy.chpl | 2 +- test/performance/ferguson/tuple/remote-tuple-write-small.chpl | 2 +- test/performance/ferguson/tuple/remote-tuple-write.chpl | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/test/performance/ferguson/array/remote-array-write.chpl b/test/performance/ferguson/array/remote-array-write.chpl index 104e10698d9b..1b22b02fc185 100644 --- a/test/performance/ferguson/array/remote-array-write.chpl +++ b/test/performance/ferguson/array/remote-array-write.chpl @@ -17,4 +17,4 @@ stop(); writeln(A[1]); writeln(A[n]); -report(maxGets=5, maxOns=1); +report(maxGets=3, maxOns=1); diff --git a/test/performance/ferguson/record/remote-record-write-copy.chpl b/test/performance/ferguson/record/remote-record-write-copy.chpl index 9816e47c9fd8..070d8ed9de89 100644 --- a/test/performance/ferguson/record/remote-record-write-copy.chpl +++ b/test/performance/ferguson/record/remote-record-write-copy.chpl @@ -26,4 +26,4 @@ stop(); writeln(A[1]); writeln(A[n]); -report(maxGets=5, maxOns=1); +report(maxGets=3, maxOns=1); diff --git a/test/performance/ferguson/record/remote-record-write.chpl b/test/performance/ferguson/record/remote-record-write.chpl index 1de28eba1298..c3c3c763e01f 100644 --- a/test/performance/ferguson/record/remote-record-write.chpl +++ b/test/performance/ferguson/record/remote-record-write.chpl @@ -27,4 +27,4 @@ stop(); writeln(A[1]); writeln(A[n]); -report(maxGets=5, maxOns=1); +report(maxGets=3, maxOns=1); diff --git a/test/performance/ferguson/tuple/remote-tuple-write-copy.chpl b/test/performance/ferguson/tuple/remote-tuple-write-copy.chpl index a66b9f4270cc..bcc975ffc2aa 100644 --- a/test/performance/ferguson/tuple/remote-tuple-write-copy.chpl +++ b/test/performance/ferguson/tuple/remote-tuple-write-copy.chpl @@ -22,4 +22,4 @@ stop(); writeln(A[1]); writeln(A[n]); -report(maxGets=5, maxOns=1); +report(maxGets=3, maxOns=1); diff --git a/test/performance/ferguson/tuple/remote-tuple-write-small-copy.chpl b/test/performance/ferguson/tuple/remote-tuple-write-small-copy.chpl index 84361283a5e9..5a45f6cf866c 100644 --- a/test/performance/ferguson/tuple/remote-tuple-write-small-copy.chpl +++ b/test/performance/ferguson/tuple/remote-tuple-write-small-copy.chpl @@ -22,4 +22,4 @@ stop(); writeln(A[1]); writeln(A[n]); -report(maxGets=5, maxOns=1); +report(maxGets=3, maxOns=1); diff --git a/test/performance/ferguson/tuple/remote-tuple-write-small.chpl b/test/performance/ferguson/tuple/remote-tuple-write-small.chpl index c609f973ea76..40bbdc890af4 100644 --- a/test/performance/ferguson/tuple/remote-tuple-write-small.chpl +++ b/test/performance/ferguson/tuple/remote-tuple-write-small.chpl @@ -20,4 +20,4 @@ stop(); writeln(A[1]); writeln(A[n]); -report(maxGets=5, maxOns=1); +report(maxGets=3, maxOns=1); diff --git a/test/performance/ferguson/tuple/remote-tuple-write.chpl b/test/performance/ferguson/tuple/remote-tuple-write.chpl index 10c44addcb8a..0678deeded57 100644 --- a/test/performance/ferguson/tuple/remote-tuple-write.chpl +++ b/test/performance/ferguson/tuple/remote-tuple-write.chpl @@ -20,4 +20,4 @@ stop(); writeln(A[1]); writeln(A[n]); -report(maxGets=5, maxOns=1); +report(maxGets=3, maxOns=1); From 98b78b15a4643b3b4c0384af7fc50f5deaa6043e Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 3 Jul 2024 15:14:26 -0700 Subject: [PATCH 82/88] Trivial test changes Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelShortArrayTransfer.chpl | 2 +- test/compflags/bradc/help/userhelp.good | 1 + test/compflags/ferguson/print-module-resolution.good | 4 ++++ .../deadCodeElimination/elliot/countDeadModules.good | 2 +- util/chpl-completion.bash | 6 ++++++ 5 files changed, 13 insertions(+), 2 deletions(-) diff --git a/modules/internal/ChapelShortArrayTransfer.chpl b/modules/internal/ChapelShortArrayTransfer.chpl index 8cdbb0e00f03..0eabba90f7f3 100644 --- a/modules/internal/ChapelShortArrayTransfer.chpl +++ b/modules/internal/ChapelShortArrayTransfer.chpl @@ -33,7 +33,7 @@ module ChapelShortArrayTransfer { @chpldoc.nodoc config param disableShortArrayTransfer = false; @chpldoc.nodoc - config const shortArrayTransferThreshold = 50; + config param shortArrayTransferThreshold = 50; diff --git a/test/compflags/bradc/help/userhelp.good b/test/compflags/bradc/help/userhelp.good index 6397f9b1f21c..a66e13a4e5dd 100644 --- a/test/compflags/bradc/help/userhelp.good +++ b/test/compflags/bradc/help/userhelp.good @@ -111,6 +111,7 @@ Optimization Control Options: automatically (dynamic only) --[no-]auto-aggregation Enable [disable] automatically aggregating remote accesses in foralls + --[no-]array-view-elision Enable [disable] array view elision Run-time Semantic Check Options: --[no-]checks Enable [disable] all following run-time diff --git a/test/compflags/ferguson/print-module-resolution.good b/test/compflags/ferguson/print-module-resolution.good index 45fd5bc24657..e967200b5381 100644 --- a/test/compflags/ferguson/print-module-resolution.good +++ b/test/compflags/ferguson/print-module-resolution.good @@ -98,6 +98,8 @@ ArrayViewReindex from print-module-resolution.ChapelStandard.LocaleModel.LocaleModelHelpFlat.LocaleModelHelpSetup.DefaultRectangular.ChapelArray.ArrayViewReindex ChapelPrivatization from print-module-resolution.ChapelStandard.LocaleModel.LocaleModelHelpFlat.LocaleModelHelpSetup.DefaultRectangular.ChapelArray.ChapelPrivatization +ChapelArrayViewElision + from print-module-resolution.ChapelStandard.LocaleModel.LocaleModelHelpFlat.LocaleModelHelpSetup.DefaultRectangular.ChapelArray.ChapelArrayViewElision ChapelIO from print-module-resolution.ChapelStandard.LocaleModel.LocaleModelHelpFlat.LocaleModelHelpSetup.DefaultRectangular.ChapelArray.ChapelDomain.FormattedIO.IO.ChapelIO Types @@ -134,6 +136,8 @@ QuickSort from print-module-resolution.ChapelStandard.LocaleModel.LocaleModelHelpFlat.LocaleModelHelpSetup.DefaultRectangular.ChapelArray.Sort.QuickSort Sort from print-module-resolution.ChapelStandard.LocaleModel.LocaleModelHelpFlat.LocaleModelHelpSetup.DefaultRectangular.ChapelArray.Sort +ChapelShortArrayTransfer + from print-module-resolution.ChapelStandard.LocaleModel.LocaleModelHelpFlat.LocaleModelHelpSetup.DefaultRectangular.ChapelArray.ChapelShortArrayTransfer ChapelArray from print-module-resolution.ChapelStandard.LocaleModel.LocaleModelHelpFlat.LocaleModelHelpSetup.DefaultRectangular.ChapelArray ChapelHashtable diff --git a/test/optimizations/deadCodeElimination/elliot/countDeadModules.good b/test/optimizations/deadCodeElimination/elliot/countDeadModules.good index 0a40c1a04d76..41beedb48de2 100644 --- a/test/optimizations/deadCodeElimination/elliot/countDeadModules.good +++ b/test/optimizations/deadCodeElimination/elliot/countDeadModules.good @@ -1 +1 @@ -Removed 30 dead modules. +Removed 32 dead modules. diff --git a/util/chpl-completion.bash b/util/chpl-completion.bash index 3a8b52a47570..f6b81a713343 100644 --- a/util/chpl-completion.bash +++ b/util/chpl-completion.bash @@ -23,6 +23,7 @@ _chpl () # developer options local devel_opts="\ --allow-noinit-array-not-pod \ +--array-view-elision \ --atomics \ --auto-aggregation \ --auto-local-access \ @@ -174,6 +175,7 @@ _chpl () --network-atomics \ --nil-checks \ --no-allow-noinit-array-not-pod \ +--no-array-view-elision \ --no-auto-aggregation \ --no-auto-local-access \ --no-bounds-checks \ @@ -277,6 +279,7 @@ _chpl () --no-remove-unreachable-blocks \ --no-replace-array-accesses-with-ref-temps \ --no-report-aliases \ +--no-report-array-view-elision \ --no-report-auto-aggregation \ --no-report-auto-local-access \ --no-report-blocking \ @@ -356,6 +359,7 @@ _chpl () --remove-unreachable-blocks \ --replace-array-accesses-with-ref-temps \ --report-aliases \ +--report-array-view-elision \ --report-auto-aggregation \ --report-auto-local-access \ --report-blocking \ @@ -428,6 +432,7 @@ _chpl () # user options local nodevel_opts="\ +--array-view-elision \ --atomics \ --auto-aggregation \ --auto-local-access \ @@ -494,6 +499,7 @@ _chpl () --munge-user-idents \ --network-atomics \ --nil-checks \ +--no-array-view-elision \ --no-auto-aggregation \ --no-auto-local-access \ --no-bounds-checks \ From 6bceef88db464175b3fd835f5137842f8a800db1 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 3 Jul 2024 15:29:36 -0700 Subject: [PATCH 83/88] Remove trailing whitespace Signed-off-by: Engin Kayraklioglu --- compiler/optimizations/arrayViewElision.cpp | 4 ++-- modules/internal/ChapelArrayViewElision.chpl | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/compiler/optimizations/arrayViewElision.cpp b/compiler/optimizations/arrayViewElision.cpp index e7159c5d7832..0605ba9e0e93 100644 --- a/compiler/optimizations/arrayViewElision.cpp +++ b/compiler/optimizations/arrayViewElision.cpp @@ -59,13 +59,13 @@ // if (array_view_elision) { // var protoSlice1 = chpl__createProtoSlice(A, x); // var protoSlice2 = chpl__createProtoSlice(B, y); -// +// // __primitive(PRIM_PROTO_SLICE_ASSIGN, protoSlice1, protoSlice2); // } // else { // A[x] = B[y]; // } -// +// // Here the "protoSlice" has type `chpl__protoSlice`. See // modules/internal/ChapelArrayViewElision.chpl for the details of that type. // The main purpose of that type is to represent the expression that would diff --git a/modules/internal/ChapelArrayViewElision.chpl b/modules/internal/ChapelArrayViewElision.chpl index 7604bbc4d5ff..43c596aff91a 100644 --- a/modules/internal/ChapelArrayViewElision.chpl +++ b/modules/internal/ChapelArrayViewElision.chpl @@ -101,13 +101,13 @@ module ChapelArrayViewElision { // // This is the type we create in lieu of full-blown array views. The key // functionality for this record are: - // + // // 1. Supporting assignment with `=` // 2. Supporting bulk transfer // 3. Supporting serial and parallel iterations for non-bulk array transfer. // // Gotchas/caveats for the implementation: - // + // // 1. We don't have `ref` fields yet. So, `ptrToArr` is used, and it stores // the `c_addrOf` of the array in question. So it points to the _array // record. @@ -141,7 +141,7 @@ module ChapelArrayViewElision { proc init() { // this constructor is called to create dummy protoSlices that will never - // be used and removed from the AST. + // be used and removed from the AST. this.rank = 1; this.isConst = true; @@ -262,7 +262,7 @@ module ChapelArrayViewElision { } else { - /* + /* Storing `inst` here and iterating over `inst` doesn't seem to work. Check the arrays primer for how that causes issues. Potentially an iterator inlining issue, or memory cleanup going sideways. @@ -285,7 +285,7 @@ module ChapelArrayViewElision { } else { - /* + /* Storing `inst` here and iterating over `inst` doesn't seem to work. Check the arrays primer for how that causes issues. Potentially an iterator inlining issue, or memory cleanup going sideways. @@ -341,7 +341,7 @@ module ChapelArrayViewElision { proc isProtoSlice(a) param { return isSubtype(a.type, chpl__protoSlice); } - // + // // private interface // From 9ece494692908eacc5b55a0fbfb628eb191eb562 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 3 Jul 2024 15:44:22 -0700 Subject: [PATCH 84/88] Add the new flag to the man page Signed-off-by: Engin Kayraklioglu --- man/chpl.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/man/chpl.rst b/man/chpl.rst index 769e1a9a42a5..86fea582e56c 100644 --- a/man/chpl.rst +++ b/man/chpl.rst @@ -467,6 +467,12 @@ OPTIONS Enable [disable] optimization of the last statement in forall statements to use aggregated communication. This optimization is disabled by default. +.. _man-array-view-elision: + +**\--[no-]array-view-elision** + + Enable [disable] an optimization eliding array views in some statements. + *Run-time Semantic Check Options* .. _man-checks: From e33797a1445053ef14bd2816fba454f11b3e3a0b Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 3 Jul 2024 16:31:04 -0700 Subject: [PATCH 85/88] Fix GPU runtime Signed-off-by: Engin Kayraklioglu --- runtime/include/localeModels/gpu/chpl-locale-model.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/include/localeModels/gpu/chpl-locale-model.h b/runtime/include/localeModels/gpu/chpl-locale-model.h index 12d68cbd97c5..b7f94c7d8dfc 100644 --- a/runtime/include/localeModels/gpu/chpl-locale-model.h +++ b/runtime/include/localeModels/gpu/chpl-locale-model.h @@ -72,7 +72,7 @@ c_sublocid_t chpl_rt_sublocFromLocaleID(chpl_localeID_t loc) { } static inline -bool chpl_equals_localeID(chpl_localeID_t* loc1, (chpl_localeID_t* loc2) { +int chpl_equals_localeID(chpl_localeID_t* loc1, chpl_localeID_t* loc2) { return loc1->node == loc2->node && loc1->subloc == loc2->subloc; } From ba321eac1a7777327f3a08efedcac992291d21d1 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Wed, 3 Jul 2024 16:48:08 -0700 Subject: [PATCH 86/88] Bump up the SAT threshold modestly Signed-off-by: Engin Kayraklioglu --- modules/internal/ChapelShortArrayTransfer.chpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/internal/ChapelShortArrayTransfer.chpl b/modules/internal/ChapelShortArrayTransfer.chpl index 0eabba90f7f3..d63c921b5b09 100644 --- a/modules/internal/ChapelShortArrayTransfer.chpl +++ b/modules/internal/ChapelShortArrayTransfer.chpl @@ -33,7 +33,7 @@ module ChapelShortArrayTransfer { @chpldoc.nodoc config param disableShortArrayTransfer = false; @chpldoc.nodoc - config param shortArrayTransferThreshold = 50; + config param shortArrayTransferThreshold = 60; // number of elements From 6ae1be5eaa04fcec16ecad18cc6ea3475f56ccae Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 5 Jul 2024 16:17:45 -0700 Subject: [PATCH 87/88] Disable SAT with GPU arrays, add test Signed-off-by: Engin Kayraklioglu --- .../internal/ChapelShortArrayTransfer.chpl | 41 +++++++++++-------- .../arrayViewElision/gpuViewElision.chpl | 37 +++++++++++++++++ .../arrayViewElision/gpuViewElision.compopts | 1 + .../arrayViewElision/gpuViewElision.execopts | 1 + .../arrayViewElision/gpuViewElision.good | 9 ++++ 5 files changed, 73 insertions(+), 16 deletions(-) create mode 100644 test/gpu/native/optimizations/arrayViewElision/gpuViewElision.chpl create mode 100644 test/gpu/native/optimizations/arrayViewElision/gpuViewElision.compopts create mode 100644 test/gpu/native/optimizations/arrayViewElision/gpuViewElision.execopts create mode 100644 test/gpu/native/optimizations/arrayViewElision/gpuViewElision.good diff --git a/modules/internal/ChapelShortArrayTransfer.chpl b/modules/internal/ChapelShortArrayTransfer.chpl index d63c921b5b09..1a24480b0258 100644 --- a/modules/internal/ChapelShortArrayTransfer.chpl +++ b/modules/internal/ChapelShortArrayTransfer.chpl @@ -25,7 +25,6 @@ module ChapelShortArrayTransfer { use ChapelBase; - use ChapelDebugPrint only chpl_debug_writeln; use ChplConfig only CHPL_LOCALE_MODEL; @chpldoc.nodoc @@ -36,7 +35,6 @@ module ChapelShortArrayTransfer { config param shortArrayTransferThreshold = 60; // number of elements - proc chpl__staticCheckShortArrayTransfer(a, b) param { // Engin: this is the case I'm focusing on in the initial PR. This can // definitely be loosened up... by a lot. @@ -44,33 +42,44 @@ module ChapelShortArrayTransfer { } inline proc chpl__dynamicCheckShortArrayTransfer(a, b) { + param msgHeader = " "; param localCompilation = _local && CHPL_LOCALE_MODEL=="flat"; const sizeOk = a.sizeAs(uint) < shortArrayTransferThreshold; - if debugShortArrayTransfer { - chpl_debug_writeln(" Size: ", a.sizeAs(uint), - " Threshold: ", shortArrayTransferThreshold); - if sizeOk then - chpl_debug_writeln(" size qualifies"); - else - chpl_debug_writeln(" size doesn't qualify"); - } + debug("Size: ", a.sizeAs(uint), " Threshold: ", + shortArrayTransferThreshold); + if sizeOk then + debug("size qualifies"); + else + debug("size doesn't qualify"); if localCompilation { return sizeOk; } else { + extern proc chpl_task_getRequestedSubloc(): int(32); // No `.locale` to avoid overheads. Note that this is an optimization for // fast-running code. Small things matter. const bothLocal = chpl__bothLocal(a, b); - if debugShortArrayTransfer { - if bothLocal then - chpl_debug_writeln(" locality qualifies"); + const notGpu = CHPL_LOCALE_MODEL=="flat" || + chpl_task_getRequestedSubloc() < 0; + + if bothLocal then + if notGpu then + debug("locality qualifies"); else - chpl_debug_writeln(" locality does not qualify"); - } + debug("GPU arrays shouldn't be SAT'ed"); + else + debug("locality does not qualify"); - return sizeOk && bothLocal; + return sizeOk && bothLocal && notGpu; + } + } + + private proc debug(s...) { + use ChapelDebugPrint only chpl_debug_writeln; + if debugShortArrayTransfer { + chpl_debug_writeln(" ", (...s)); } } } diff --git a/test/gpu/native/optimizations/arrayViewElision/gpuViewElision.chpl b/test/gpu/native/optimizations/arrayViewElision/gpuViewElision.chpl new file mode 100644 index 000000000000..4bde69249911 --- /dev/null +++ b/test/gpu/native/optimizations/arrayViewElision/gpuViewElision.chpl @@ -0,0 +1,37 @@ +use Time; + +config type elemType = int; +var t: stopwatch; +config const correctness = false; +config const useLoop = false; +config const arrSize = 10; +config const totalSeconds = if correctness then 0 else 5; +config const checkGranularity = if correctness then 1 else 10000; + +var totalOps = 0; +on here.gpus[0] { + var Arr1: [1..arrSize] elemType; + var Arr2: [1..arrSize] elemType = 1; + + t.start(); + do { + for 1..checkGranularity { + Arr1[1..arrSize] = Arr2[1..arrSize]; + } + + totalOps += checkGranularity; + } while t.elapsed() < totalSeconds; + t.stop(); + + + + if correctness { + writeln(Arr1[4]); + } + else { + const memCopied: real = totalOps*arrSize*numBytes(elemType); + writef("Total time(s): %.2dr\n", t.elapsed()); + writef("Total memory copied(GB): %.5dr\n", memCopied/2**30); + writef("Sustained throughput(GB/s): %.5dr\n", memCopied/2**30/t.elapsed()); + } +} diff --git a/test/gpu/native/optimizations/arrayViewElision/gpuViewElision.compopts b/test/gpu/native/optimizations/arrayViewElision/gpuViewElision.compopts new file mode 100644 index 000000000000..3f27f7a0bd37 --- /dev/null +++ b/test/gpu/native/optimizations/arrayViewElision/gpuViewElision.compopts @@ -0,0 +1 @@ +-sdebugBulkTransfer -sdebugShortArrayTransfer diff --git a/test/gpu/native/optimizations/arrayViewElision/gpuViewElision.execopts b/test/gpu/native/optimizations/arrayViewElision/gpuViewElision.execopts new file mode 100644 index 000000000000..83996d2a7860 --- /dev/null +++ b/test/gpu/native/optimizations/arrayViewElision/gpuViewElision.execopts @@ -0,0 +1 @@ +--correctness=true diff --git a/test/gpu/native/optimizations/arrayViewElision/gpuViewElision.good b/test/gpu/native/optimizations/arrayViewElision/gpuViewElision.good new file mode 100644 index 000000000000..bc832740ce44 --- /dev/null +++ b/test/gpu/native/optimizations/arrayViewElision/gpuViewElision.good @@ -0,0 +1,9 @@ + Size: 10 Threshold: 60 + size qualifies + GPU arrays shouldn't be SAT'ed +Performing protoSlice bulk transfer +operator =(a:[],b:[]): in chpl__bulkTransferArray +operator =(a:[],b:[]): attempting doiBulkTransferFromKnown +In DefaultRectangular._simpleTransfer(): Alo=(1,), Blo=(1,), len=10, elemSize=8 +operator =(a:[],b:[]): successfully completed bulk transfer +1 From b70c7514b3360cecdd183802ce9bddd3f910e215 Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 5 Jul 2024 16:49:10 -0700 Subject: [PATCH 88/88] Adjust good files Signed-off-by: Engin Kayraklioglu --- .../avoidExtraCalls.comm-none.good | 6 +-- .../arrayViewElision/avoidExtraCalls.good | 6 +-- .../rankChanges.comm-none.good | 8 +-- .../arrayViewElision/rankChanges.good | 8 +-- .../slices-1d-domain.comm-none.good | 12 ++--- .../arrayViewElision/slices-1d-domain.good | 12 ++--- .../slices-1d-range.comm-none.good | 50 +++++++++---------- .../arrayViewElision/slices-1d-range.good | 50 +++++++++---------- .../slices-2d-domain.comm-none.good | 12 ++--- .../arrayViewElision/slices-2d-domain.good | 12 ++--- .../slices-2d-range.comm-none.good | 50 +++++++++---------- .../arrayViewElision/slices-2d-range.good | 50 +++++++++---------- 12 files changed, 138 insertions(+), 138 deletions(-) diff --git a/test/optimizations/arrayViewElision/avoidExtraCalls.comm-none.good b/test/optimizations/arrayViewElision/avoidExtraCalls.comm-none.good index f63d929e32a1..0b7b6aba6225 100644 --- a/test/optimizations/arrayViewElision/avoidExtraCalls.comm-none.good +++ b/test/optimizations/arrayViewElision/avoidExtraCalls.comm-none.good @@ -27,14 +27,14 @@ ArrayViewElision supported avoidExtraCalls.chpl:39 Test 1 foo called foo called - Size: 3 Threshold: 50 + Size: 3 Threshold: 60 size qualifies 1 1 1 0 0 0 0 0 0 0 Test 2 foo called foo called - Size: 3 Threshold: 50 + Size: 3 Threshold: 60 size qualifies 1 1 1 0 0 0 0 0 0 0 @@ -43,7 +43,7 @@ foo called foo called foo called foo called - Size: 9 Threshold: 50 + Size: 9 Threshold: 60 size qualifies 1 1 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 diff --git a/test/optimizations/arrayViewElision/avoidExtraCalls.good b/test/optimizations/arrayViewElision/avoidExtraCalls.good index bf94bf5f6d6c..d0c5be8548a5 100644 --- a/test/optimizations/arrayViewElision/avoidExtraCalls.good +++ b/test/optimizations/arrayViewElision/avoidExtraCalls.good @@ -27,7 +27,7 @@ ArrayViewElision supported (dynamic locality check required) avoidExtraCalls.chp Test 1 foo called foo called - Size: 3 Threshold: 50 + Size: 3 Threshold: 60 size qualifies locality qualifies 1 1 1 0 0 0 0 0 0 0 @@ -35,7 +35,7 @@ foo called Test 2 foo called foo called - Size: 3 Threshold: 50 + Size: 3 Threshold: 60 size qualifies locality qualifies 1 1 1 0 0 0 0 0 0 0 @@ -45,7 +45,7 @@ foo called foo called foo called foo called - Size: 9 Threshold: 50 + Size: 9 Threshold: 60 size qualifies locality qualifies 1 1 1 0 0 0 0 0 0 0 diff --git a/test/optimizations/arrayViewElision/rankChanges.comm-none.good b/test/optimizations/arrayViewElision/rankChanges.comm-none.good index d23ae64ca555..5abf1cf73bd5 100644 --- a/test/optimizations/arrayViewElision/rankChanges.comm-none.good +++ b/test/optimizations/arrayViewElision/rankChanges.comm-none.good @@ -58,7 +58,7 @@ ArrayViewElision not supported rankChanges.chpl:20 int(64) range(int(64),both,one) - Size: 3 Threshold: 50 + Size: 3 Threshold: 60 size qualifies 1 1 1 1 1 1 1 1 1 1 @@ -66,7 +66,7 @@ ArrayViewElision not supported rankChanges.chpl:20 1 1 1 1 1 1 1 1 1 1 - Size: 3 Threshold: 50 + Size: 3 Threshold: 60 size qualifies 1 1 2 1 1 1 1 2 1 1 @@ -74,7 +74,7 @@ ArrayViewElision not supported rankChanges.chpl:20 1 1 1 1 1 1 1 1 1 1 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies 1 1 1 1 1 1 1 1 1 1 @@ -82,7 +82,7 @@ ArrayViewElision not supported rankChanges.chpl:20 1 1 1 1 1 1 1 1 1 1 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies 1 1 2 1 1 1 1 2 1 1 diff --git a/test/optimizations/arrayViewElision/rankChanges.good b/test/optimizations/arrayViewElision/rankChanges.good index 53c5916135db..bc2abd136e59 100644 --- a/test/optimizations/arrayViewElision/rankChanges.good +++ b/test/optimizations/arrayViewElision/rankChanges.good @@ -58,7 +58,7 @@ ArrayViewElision not supported rankChanges.chpl:20 int(64) range(int(64),both,one) - Size: 3 Threshold: 50 + Size: 3 Threshold: 60 size qualifies locality qualifies 1 1 1 1 1 @@ -67,7 +67,7 @@ ArrayViewElision not supported rankChanges.chpl:20 1 1 1 1 1 1 1 1 1 1 - Size: 3 Threshold: 50 + Size: 3 Threshold: 60 size qualifies locality qualifies 1 1 2 1 1 @@ -76,7 +76,7 @@ ArrayViewElision not supported rankChanges.chpl:20 1 1 1 1 1 1 1 1 1 1 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies 1 1 1 1 1 @@ -85,7 +85,7 @@ ArrayViewElision not supported rankChanges.chpl:20 1 1 1 1 1 1 1 1 1 1 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies 1 1 2 1 1 diff --git a/test/optimizations/arrayViewElision/slices-1d-domain.comm-none.good b/test/optimizations/arrayViewElision/slices-1d-domain.comm-none.good index 390e6acb35c2..436e02fc44e4 100644 --- a/test/optimizations/arrayViewElision/slices-1d-domain.comm-none.good +++ b/test/optimizations/arrayViewElision/slices-1d-domain.comm-none.good @@ -15,36 +15,36 @@ ArrayViewElision supported slices.chpl:24 domain(1,int(64),one) Set first two: - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 1 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 2 2 2 1 1 1 ----------------- Set last two: - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 3 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 4 1 1 1 2 2 ----------------- Set all: - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies Test 5 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies Test 6 2 2 2 2 2 diff --git a/test/optimizations/arrayViewElision/slices-1d-domain.good b/test/optimizations/arrayViewElision/slices-1d-domain.good index d20fc9b51b38..8e4066c3f7f6 100644 --- a/test/optimizations/arrayViewElision/slices-1d-domain.good +++ b/test/optimizations/arrayViewElision/slices-1d-domain.good @@ -15,13 +15,13 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:24 domain(1,int(64),one) Set first two: - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 1 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 2 @@ -29,13 +29,13 @@ Test 2 ----------------- Set last two: - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 3 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 4 @@ -43,13 +43,13 @@ Test 4 ----------------- Set all: - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies Test 5 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies Test 6 diff --git a/test/optimizations/arrayViewElision/slices-1d-range.comm-none.good b/test/optimizations/arrayViewElision/slices-1d-range.comm-none.good index 2440e2a251a9..1c7817657a63 100644 --- a/test/optimizations/arrayViewElision/slices-1d-range.comm-none.good +++ b/test/optimizations/arrayViewElision/slices-1d-range.comm-none.good @@ -111,131 +111,131 @@ ArrayViewElision supported slices.chpl:26 range(int(64),neither,one) Set first two: - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 1 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 2 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 3 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 4 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 5 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 6 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 7 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 8 2 2 1 1 1 ----------------- Set last two: - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 9 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 10 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 11 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 12 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 13 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 14 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 15 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies Test 16 1 1 1 2 2 ----------------- Set all: - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies Test 17 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies Test 18 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies Test 19 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies Test 20 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies Test 21 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies Test 22 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies Test 23 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies Test 24 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies Test 25 2 2 2 2 2 diff --git a/test/optimizations/arrayViewElision/slices-1d-range.good b/test/optimizations/arrayViewElision/slices-1d-range.good index f5cb50f914a2..af3ebf7e7716 100644 --- a/test/optimizations/arrayViewElision/slices-1d-range.good +++ b/test/optimizations/arrayViewElision/slices-1d-range.good @@ -111,49 +111,49 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:26 range(int(64),neither,one) Set first two: - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 1 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 2 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 3 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 4 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 5 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 6 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 7 2 2 1 1 1 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 8 @@ -161,49 +161,49 @@ Test 8 ----------------- Set last two: - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 9 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 10 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 11 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 12 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 13 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 14 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 15 1 1 1 2 2 - Size: 2 Threshold: 50 + Size: 2 Threshold: 60 size qualifies locality qualifies Test 16 @@ -211,55 +211,55 @@ Test 16 ----------------- Set all: - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies Test 17 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies Test 18 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies Test 19 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies Test 20 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies Test 21 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies Test 22 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies Test 23 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies Test 24 2 2 2 2 2 - Size: 5 Threshold: 50 + Size: 5 Threshold: 60 size qualifies locality qualifies Test 25 diff --git a/test/optimizations/arrayViewElision/slices-2d-domain.comm-none.good b/test/optimizations/arrayViewElision/slices-2d-domain.comm-none.good index 9e85e4626b52..1153a81438c4 100644 --- a/test/optimizations/arrayViewElision/slices-2d-domain.comm-none.good +++ b/test/optimizations/arrayViewElision/slices-2d-domain.comm-none.good @@ -15,7 +15,7 @@ ArrayViewElision supported slices.chpl:24 domain(2,int(64),one) Set first two: - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 1 2 2 1 1 1 @@ -24,7 +24,7 @@ Test 1 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 2 2 2 1 1 1 @@ -35,7 +35,7 @@ Test 2 ----------------- Set last two: - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 3 1 1 1 1 1 @@ -44,7 +44,7 @@ Test 3 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 4 1 1 1 1 1 @@ -55,7 +55,7 @@ Test 4 ----------------- Set all: - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies Test 5 2 2 2 2 2 @@ -64,7 +64,7 @@ Test 5 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies Test 6 2 2 2 2 2 diff --git a/test/optimizations/arrayViewElision/slices-2d-domain.good b/test/optimizations/arrayViewElision/slices-2d-domain.good index 1a9478d6684b..5a874225ce6c 100644 --- a/test/optimizations/arrayViewElision/slices-2d-domain.good +++ b/test/optimizations/arrayViewElision/slices-2d-domain.good @@ -15,7 +15,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:24 domain(2,int(64),one) Set first two: - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 1 @@ -25,7 +25,7 @@ Test 1 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 2 @@ -37,7 +37,7 @@ Test 2 ----------------- Set last two: - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 3 @@ -47,7 +47,7 @@ Test 3 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 4 @@ -59,7 +59,7 @@ Test 4 ----------------- Set all: - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies locality qualifies Test 5 @@ -69,7 +69,7 @@ Test 5 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies locality qualifies Test 6 diff --git a/test/optimizations/arrayViewElision/slices-2d-range.comm-none.good b/test/optimizations/arrayViewElision/slices-2d-range.comm-none.good index d054bd11e263..1be0e42f42c1 100644 --- a/test/optimizations/arrayViewElision/slices-2d-range.comm-none.good +++ b/test/optimizations/arrayViewElision/slices-2d-range.comm-none.good @@ -139,7 +139,7 @@ ArrayViewElision supported slices.chpl:26 range(int(64),neither,one) Set first two: - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 1 2 2 1 1 1 @@ -148,7 +148,7 @@ Test 1 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 2 2 2 1 1 1 @@ -157,7 +157,7 @@ Test 2 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 3 2 2 1 1 1 @@ -166,7 +166,7 @@ Test 3 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 4 2 2 1 1 1 @@ -175,7 +175,7 @@ Test 4 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 5 2 2 1 1 1 @@ -184,7 +184,7 @@ Test 5 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 6 2 2 1 1 1 @@ -193,7 +193,7 @@ Test 6 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 7 2 2 1 1 1 @@ -202,7 +202,7 @@ Test 7 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 8 2 2 1 1 1 @@ -213,7 +213,7 @@ Test 8 ----------------- Set last two: - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 9 1 1 1 1 1 @@ -222,7 +222,7 @@ Test 9 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 10 1 1 1 1 1 @@ -231,7 +231,7 @@ Test 10 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 11 1 1 1 1 1 @@ -240,7 +240,7 @@ Test 11 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 12 1 1 1 1 1 @@ -249,7 +249,7 @@ Test 12 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 13 1 1 1 1 1 @@ -258,7 +258,7 @@ Test 13 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 14 1 1 1 1 1 @@ -267,7 +267,7 @@ Test 14 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 15 1 1 1 1 1 @@ -276,7 +276,7 @@ Test 15 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies Test 16 1 1 1 1 1 @@ -287,7 +287,7 @@ Test 16 ----------------- Set all: - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies Test 17 2 2 2 2 2 @@ -296,7 +296,7 @@ Test 17 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies Test 18 2 2 2 2 2 @@ -305,7 +305,7 @@ Test 18 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies Test 19 2 2 2 2 2 @@ -314,7 +314,7 @@ Test 19 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies Test 20 2 2 2 2 2 @@ -323,7 +323,7 @@ Test 20 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies Test 21 2 2 2 2 2 @@ -332,7 +332,7 @@ Test 21 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies Test 22 2 2 2 2 2 @@ -341,7 +341,7 @@ Test 22 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies Test 23 2 2 2 2 2 @@ -350,7 +350,7 @@ Test 23 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies Test 24 2 2 2 2 2 @@ -359,7 +359,7 @@ Test 24 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies Test 25 2 2 2 2 2 diff --git a/test/optimizations/arrayViewElision/slices-2d-range.good b/test/optimizations/arrayViewElision/slices-2d-range.good index d8fe2715ad84..18d1bd420fac 100644 --- a/test/optimizations/arrayViewElision/slices-2d-range.good +++ b/test/optimizations/arrayViewElision/slices-2d-range.good @@ -139,7 +139,7 @@ ArrayViewElision supported (dynamic locality check required) slices.chpl:26 range(int(64),neither,one) Set first two: - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 1 @@ -149,7 +149,7 @@ Test 1 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 2 @@ -159,7 +159,7 @@ Test 2 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 3 @@ -169,7 +169,7 @@ Test 3 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 4 @@ -179,7 +179,7 @@ Test 4 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 5 @@ -189,7 +189,7 @@ Test 5 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 6 @@ -199,7 +199,7 @@ Test 6 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 7 @@ -209,7 +209,7 @@ Test 7 1 1 1 1 1 1 1 1 1 1 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 8 @@ -221,7 +221,7 @@ Test 8 ----------------- Set last two: - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 9 @@ -231,7 +231,7 @@ Test 9 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 10 @@ -241,7 +241,7 @@ Test 10 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 11 @@ -251,7 +251,7 @@ Test 11 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 12 @@ -261,7 +261,7 @@ Test 12 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 13 @@ -271,7 +271,7 @@ Test 13 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 14 @@ -281,7 +281,7 @@ Test 14 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 15 @@ -291,7 +291,7 @@ Test 15 1 1 1 2 2 1 1 1 2 2 - Size: 4 Threshold: 50 + Size: 4 Threshold: 60 size qualifies locality qualifies Test 16 @@ -303,7 +303,7 @@ Test 16 ----------------- Set all: - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies locality qualifies Test 17 @@ -313,7 +313,7 @@ Test 17 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies locality qualifies Test 18 @@ -323,7 +323,7 @@ Test 18 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies locality qualifies Test 19 @@ -333,7 +333,7 @@ Test 19 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies locality qualifies Test 20 @@ -343,7 +343,7 @@ Test 20 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies locality qualifies Test 21 @@ -353,7 +353,7 @@ Test 21 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies locality qualifies Test 22 @@ -363,7 +363,7 @@ Test 22 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies locality qualifies Test 23 @@ -373,7 +373,7 @@ Test 23 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies locality qualifies Test 24 @@ -383,7 +383,7 @@ Test 24 2 2 2 2 2 2 2 2 2 2 - Size: 25 Threshold: 50 + Size: 25 Threshold: 60 size qualifies locality qualifies Test 25