diff --git a/CHANGES b/CHANGES index ad6e040d3fb..26302066097 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,29 @@ +6.2.0-dev.275 | 2023-12-12 09:52:00 +0100 + + * recent BTests that should be skipped when using -O gen-C++ (Vern Paxson, Corelight) + + * expanded ZAM maintenance notes & support scripts (Vern Paxson, Corelight) + + * script optimization tracking of functions called by event engine or indirectly (Vern Paxson, Corelight) + + * memory-handling fixes for information associated with low-level ZAM instructions (Vern Paxson, Corelight) + + * fix for -O C++ lambda functions reporting errors/warnings (Vern Paxson, Corelight) + + * revert problems with profiling attributes introduced by recent script-opt PR (Vern Paxson, Corelight) + + * script optimization fixes for pattern tables (Vern Paxson, Corelight) + + * regularized (some) types of pointers used in script optimization (Vern Paxson, Corelight) + + * splitting off script optimization CSE into its own source files (Vern Paxson, Corelight) + + * some very minor tidying of script optimization code/documentation (Vern Paxson, Corelight) + + * fix for Trigger's whose termination leads to deleting other Trigger's (Vern Paxson, Corelight) + + * bug fix for delayed logging (Vern Paxson, Corelight) + 6.2.0-dev.262 | 2023-12-11 13:11:09 +0100 * Bump auxil/spicy to latest development snapshot (Benjamin Bannier, Corelight) diff --git a/VERSION b/VERSION index 464051f7c8f..00cba4b3fb3 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -6.2.0-dev.262 +6.2.0-dev.275 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 37f6109cd24..27f0834715d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -394,6 +394,7 @@ set(MAIN_SRCS script_opt/CPP/Util.cc script_opt/CPP/Vars.cc ${_gen_zeek_script_cpp} + script_opt/CSE.cc script_opt/Expr.cc script_opt/FuncInfo.cc script_opt/GenIDDefs.cc diff --git a/src/Notifier.cc b/src/Notifier.cc index ce3e8d68ebc..63afa3ddbd5 100644 --- a/src/Notifier.cc +++ b/src/Notifier.cc @@ -58,13 +58,10 @@ void Registry::Modified(Modifiable* m) { } void Registry::Terminate() { - std::set receivers; - - for ( auto& r : registrations ) - receivers.emplace(r.second); - - for ( auto& r : receivers ) - r->Terminate(); + while ( ! registrations.empty() ) { + const auto& it = registrations.begin(); + it->second->Terminate(); + } } Modifiable::~Modifiable() { diff --git a/src/Trigger.h b/src/Trigger.h index 8b2004f42b0..6c4fade3beb 100644 --- a/src/Trigger.h +++ b/src/Trigger.h @@ -69,7 +69,7 @@ class Trigger final : public Obj, public notifier::detail::Receiver { ~Trigger() override; // Evaluates the condition. If true, executes the body and deletes - // the object deleted. + // the object. // // Returns the state of condition. bool Eval(); diff --git a/src/logging/Manager.cc b/src/logging/Manager.cc index c0109e929bc..98098c2f159 100644 --- a/src/logging/Manager.cc +++ b/src/logging/Manager.cc @@ -1194,7 +1194,7 @@ ValPtr Manager::Delay(const EnumValPtr& id, const RecordValPtr record, FuncPtr p } const auto& active_write_ctx = active_writes.back(); - if ( active_write_ctx.id != id || active_write_ctx.record != record ) { + if ( active_write_ctx.id->Get() != id->Get() || active_write_ctx.record != record ) { reporter->Error("invalid Log::delay() call: argument mismatch with active Log::write()"); return make_intrusive(); } diff --git a/src/script_opt/CPP/Attrs.cc b/src/script_opt/CPP/Attrs.cc index dad23eaa6a6..0aaf955ef22 100644 --- a/src/script_opt/CPP/Attrs.cc +++ b/src/script_opt/CPP/Attrs.cc @@ -16,7 +16,7 @@ shared_ptr CPPCompile::RegisterAttributes(const AttributesPtr& att if ( pa != processed_attrs.end() ) return pa->second; - attributes.AddKey(attrs, pfs.HashAttrs(attrs)); + attributes.AddKey(attrs, pfs->HashAttrs(attrs)); // The cast is just so we can make an IntrusivePtr. auto a_rep = const_cast(attributes.GetRep(attrs)); diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h index 2a55ea0fe59..5b05cc07e98 100644 --- a/src/script_opt/CPP/Compile.h +++ b/src/script_opt/CPP/Compile.h @@ -124,8 +124,8 @@ namespace zeek::detail { class CPPCompile { public: - CPPCompile(std::vector& _funcs, ProfileFuncs& pfs, const std::string& gen_name, bool _standalone, - bool report_uncompilable); + CPPCompile(std::vector& _funcs, std::shared_ptr pfs, const std::string& gen_name, + bool _standalone, bool report_uncompilable); ~CPPCompile(); // Constructing a CPPCompile object does all of the compilation. @@ -191,7 +191,7 @@ class CPPCompile { // However, we can't generate that code when first encountering // the attribute, because doing so will need to refer to the names // of types, and initially those are unavailable (because the type's - // representatives, per pfs.RepTypes(), might not have yet been + // representatives, per pfs->RepTypes(), might not have yet been // tracked). So instead we track the associated CallExprInitInfo // objects, and after all types have been tracked, then spin // through them to generate the code. @@ -314,7 +314,7 @@ class CPPCompile { std::vector& funcs; // The global profile of all of the functions. - ProfileFuncs& pfs; + std::shared_ptr pfs; // Script functions that we are able to compile. We compute // these ahead of time so that when compiling script function A @@ -894,7 +894,7 @@ class CPPCompile { // Returns the "representative" for a given type, used to ensure // that we re-use the C++ variable corresponding to a type and // don't instantiate redundant instances. - const Type* TypeRep(const Type* t) { return pfs.TypeRep(t); } + const Type* TypeRep(const Type* t) { return pfs->TypeRep(t); } const Type* TypeRep(const TypePtr& t) { return TypeRep(t.get()); } // Low-level C++ representations for types, of various flavors. diff --git a/src/script_opt/CPP/DeclFunc.cc b/src/script_opt/CPP/DeclFunc.cc index 0d9566037b8..9ff0eb41344 100644 --- a/src/script_opt/CPP/DeclFunc.cc +++ b/src/script_opt/CPP/DeclFunc.cc @@ -144,6 +144,7 @@ void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, c StartBlock(); Emit("flow = FLOW_RETURN;"); + Emit("f->SetOnlyCall(ce.get());"); if ( in_hook ) { Emit("if ( ! %s(%s) )", fname, args); diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index 3dee4fc5289..3dc7e991fc5 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -13,9 +13,9 @@ namespace zeek::detail { using namespace std; -CPPCompile::CPPCompile(vector& _funcs, ProfileFuncs& _pfs, const string& gen_name, bool _standalone, - bool report_uncompilable) - : funcs(_funcs), pfs(_pfs), standalone(_standalone) { +CPPCompile::CPPCompile(vector& _funcs, std::shared_ptr _pfs, const string& gen_name, + bool _standalone, bool report_uncompilable) + : funcs(_funcs), pfs(std::move(_pfs)), standalone(_standalone) { auto target_name = gen_name.c_str(); write_file = fopen(target_name, "w"); @@ -99,21 +99,21 @@ void CPPCompile::Compile(bool report_uncompilable) { GenProlog(); // Track all of the types we'll be using. - for ( const auto& t : pfs.RepTypes() ) { + for ( const auto& t : pfs->RepTypes() ) { TypePtr tp{NewRef{}, (Type*)(t)}; - types.AddKey(tp, pfs.HashType(t)); + types.AddKey(tp, pfs->HashType(t)); } NL(); - for ( auto& g : pfs.AllGlobals() ) + for ( auto& g : pfs->AllGlobals() ) CreateGlobal(g); - for ( const auto& e : pfs.Events() ) + for ( const auto& e : pfs->Events() ) if ( AddGlobal(e, "gl") ) Emit("EventHandlerPtr %s_ev;", globals[string(e)]); - for ( const auto& t : pfs.RepTypes() ) { + for ( const auto& t : pfs->RepTypes() ) { ASSERT(types.HasKey(t)); TypePtr tp{NewRef{}, (Type*)(t)}; RegisterType(tp); @@ -131,14 +131,14 @@ void CPPCompile::Compile(bool report_uncompilable) { // be identical. In that case, we don't want to generate the lambda // twice, but we do want to map the second one to the same body name. unordered_map lambda_ASTs; - for ( const auto& l : pfs.Lambdas() ) { + for ( const auto& l : pfs->Lambdas() ) { const auto& n = l->Name(); const auto body = l->Ingredients()->Body().get(); if ( lambda_ASTs.count(n) > 0 ) // Reuse previous body. body_names[body] = body_names[lambda_ASTs[n]]; else { - DeclareLambda(l, pfs.ExprProf(l).get()); + DeclareLambda(l, pfs->ExprProf(l).get()); lambda_ASTs[n] = body; } } @@ -151,12 +151,12 @@ void CPPCompile::Compile(bool report_uncompilable) { CompileFunc(func); lambda_ASTs.clear(); - for ( const auto& l : pfs.Lambdas() ) { + for ( const auto& l : pfs->Lambdas() ) { const auto& n = l->Name(); if ( lambda_ASTs.count(n) > 0 ) continue; - CompileLambda(l, pfs.ExprProf(l).get()); + CompileLambda(l, pfs->ExprProf(l).get()); lambda_ASTs[n] = l->Ingredients()->Body().get(); } diff --git a/src/script_opt/CPP/Exprs.cc b/src/script_opt/CPP/Exprs.cc index f798b4f91c3..ed0591ed516 100644 --- a/src/script_opt/CPP/Exprs.cc +++ b/src/script_opt/CPP/Exprs.cc @@ -151,7 +151,7 @@ string CPPCompile::GenNameExpr(const NameExpr* ne, GenType gt) { if ( t->Tag() == TYPE_FUNC && ! is_global_var ) { auto func = n->Name(); - if ( globals.count(func) > 0 && pfs.BiFGlobals().count(n) == 0 ) + if ( globals.count(func) > 0 && pfs->BiFGlobals().count(n) == 0 ) return GenericValPtrToGT(IDNameStr(n), t, gt); } @@ -202,9 +202,9 @@ string CPPCompile::GenIncrExpr(const Expr* e, GenType gt, bool is_incr, bool top // Make sure any newly created types are known to // the profiler. - (void)pfs.HashType(one_e->GetType()); - (void)pfs.HashType(rhs->GetType()); - (void)pfs.HashType(assign->GetType()); + (void)pfs->HashType(one_e->GetType()); + (void)pfs->HashType(rhs->GetType()); + (void)pfs->HashType(assign->GetType()); auto gen = GenExpr(assign, GEN_DONT_CARE, top_level); @@ -269,10 +269,10 @@ string CPPCompile::GenCallExpr(const CallExpr* c, GenType gt, bool top_level) { // // If it is a BiF *that's also a global variable*, then // we need to look up the BiF version of the global. - if ( pfs.BiFGlobals().count(f_id) == 0 ) + if ( pfs->BiFGlobals().count(f_id) == 0 ) gen += +"->AsFunc()"; - else if ( pfs.Globals().count(f_id) > 0 ) + else if ( pfs->Globals().count(f_id) > 0 ) // The BiF version has an extra "_", per // AddBiF(..., true). gen = globals[string(id_name) + "_"]; @@ -318,20 +318,25 @@ string CPPCompile::GenInExpr(const Expr* e, GenType gt) { auto t1 = op1->GetType(); auto t2 = op2->GetType(); + auto tag1 = t1->Tag(); + auto tag2 = t2->Tag(); + string gen; - if ( t1->Tag() == TYPE_PATTERN ) + if ( tag1 == TYPE_STRING && tag2 == TYPE_TABLE && t2->AsTableType()->IsPatternIndex() ) + gen = GenExpr(op2, GEN_DONT_CARE) + "->MatchPattern(" + GenExpr(op1, GEN_NATIVE) + ")"; + else if ( tag1 == TYPE_PATTERN ) gen = string("(") + GenExpr(op1, GEN_DONT_CARE) + ")->MatchAnywhere(" + GenExpr(op2, GEN_DONT_CARE) + "->AsString())"; - else if ( t2->Tag() == TYPE_STRING ) + else if ( tag2 == TYPE_STRING ) gen = string("str_in__CPP(") + GenExpr(op1, GEN_DONT_CARE) + "->AsString(), " + GenExpr(op2, GEN_DONT_CARE) + "->AsString())"; - else if ( t1->Tag() == TYPE_ADDR && t2->Tag() == TYPE_SUBNET ) + else if ( tag1 == TYPE_ADDR && tag2 == TYPE_SUBNET ) gen = string("(") + GenExpr(op2, GEN_DONT_CARE) + ")->Contains(" + GenExpr(op1, GEN_VAL_PTR) + "->Get())"; - else if ( t2->Tag() == TYPE_VECTOR ) + else if ( tag2 == TYPE_VECTOR ) gen = GenExpr(op2, GEN_DONT_CARE) + "->Has(" + GenExpr(op1, GEN_NATIVE) + ")"; else @@ -511,8 +516,8 @@ string CPPCompile::GenAddToExpr(const Expr* e, GenType gt, bool top_level) { // Make sure any newly created types are known to // the profiler. - (void)pfs.HashType(rhs->GetType()); - (void)pfs.HashType(assign->GetType()); + (void)pfs->HashType(rhs->GetType()); + (void)pfs->HashType(assign->GetType()); return GenExpr(assign, gt, top_level); } @@ -542,8 +547,8 @@ string CPPCompile::GenRemoveFromExpr(const Expr* e, GenType gt, bool top_level) // Make sure any newly created types are known to // the profiler. - (void)pfs.HashType(rhs->GetType()); - (void)pfs.HashType(assign->GetType()); + (void)pfs->HashType(rhs->GetType()); + (void)pfs->HashType(assign->GetType()); return GenExpr(assign, gt, top_level); } diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index 2917b8d199e..4e7a30b1458 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -180,7 +180,7 @@ void CPPCompile::InitializeGlobals() { for ( const auto& ginit : IDOptInfo::GetGlobalInitExprs() ) { auto g = ginit.Id(); - if ( pfs.Globals().count(g) == 0 ) + if ( pfs->Globals().count(g) == 0 ) continue; auto ic = ginit.IC(); diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc index 91e087f17b3..3dc3536edcb 100644 --- a/src/script_opt/CPP/InitsInfo.cc +++ b/src/script_opt/CPP/InitsInfo.cc @@ -460,12 +460,12 @@ FuncTypeInfo::FuncTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, st params = f->Params(); yield = f->Yield(); - auto gi = c->RegisterType(f->Params()); + auto gi = c->RegisterType(params); if ( gi ) init_cohort = gi->InitCohort(); if ( yield ) { - gi = c->RegisterType(f->Yield()); + auto gi = c->RegisterType(f->Yield()); if ( gi ) init_cohort = max(init_cohort, gi->InitCohort()); } diff --git a/src/script_opt/CPP/InitsInfo.h b/src/script_opt/CPP/InitsInfo.h index d7f8c28e179..57121817bc1 100644 --- a/src/script_opt/CPP/InitsInfo.h +++ b/src/script_opt/CPP/InitsInfo.h @@ -594,7 +594,7 @@ class FuncTypeInfo : public AbstractTypeInfo { private: FunctionFlavor flavor; - TypePtr params; + RecordTypePtr params; TypePtr yield; }; diff --git a/src/script_opt/CPP/Vars.cc b/src/script_opt/CPP/Vars.cc index f326d2708be..4aa967c3f8b 100644 --- a/src/script_opt/CPP/Vars.cc +++ b/src/script_opt/CPP/Vars.cc @@ -9,9 +9,9 @@ using namespace std; void CPPCompile::CreateGlobal(const ID* g) { auto gn = string(g->Name()); - bool is_bif = pfs.BiFGlobals().count(g) > 0; + bool is_bif = pfs->BiFGlobals().count(g) > 0; - if ( pfs.Globals().count(g) == 0 ) { + if ( pfs->Globals().count(g) == 0 ) { // Only used in the context of calls. If it's compilable, // then we'll call it directly. if ( compilable_funcs.count(gn) > 0 ) { @@ -28,7 +28,7 @@ void CPPCompile::CreateGlobal(const ID* g) { if ( AddGlobal(gn, "gl") ) { // We'll be creating this global. Emit("IDPtr %s;", globals[gn]); - if ( pfs.Events().count(gn) > 0 ) + if ( pfs->Events().count(gn) > 0 ) // This is an event that's also used as a variable. Emit("EventHandlerPtr %s_ev;", globals[gn]); diff --git a/src/script_opt/CPP/maint/README b/src/script_opt/CPP/maint/README index 9e36f43dd4d..0913151dbd9 100644 --- a/src/script_opt/CPP/maint/README +++ b/src/script_opt/CPP/maint/README @@ -13,7 +13,7 @@ The maintenance workflow: 1. Make sure the compiler can compile and execute the base scripts: - echo | src/zeek -O gen-C++ + src/zeek -O gen-C++ /dev/null ninja src/zeek -O use-C++ -r some.pcap @@ -21,7 +21,7 @@ The maintenance workflow: rm CPP-gen.cc ninja - echo | src/zeek -O gen-standalone-C++ + src/zeek -O gen-standalone-C++ /dev/null ninja rm CPP-gen.cc ninja diff --git a/src/script_opt/CSE.cc b/src/script_opt/CSE.cc new file mode 100644 index 00000000000..3433ed13232 --- /dev/null +++ b/src/script_opt/CSE.cc @@ -0,0 +1,266 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/script_opt/CSE.h" + +namespace zeek::detail { + +CSE_ValidityChecker::CSE_ValidityChecker(std::shared_ptr _pfs, const std::vector& _ids, + const Expr* _start_e, const Expr* _end_e) + : pfs(std::move(_pfs)), ids(_ids) { + start_e = _start_e; + end_e = _end_e; + + // Track whether this is a record assignment, in which case + // we're attuned to assignments to the same field for the + // same type of record. + if ( start_e->Tag() == EXPR_FIELD ) { + field = start_e->AsFieldExpr()->Field(); + + // Track the type of the record, too, so we don't confuse + // field references to different records that happen to + // have the same offset as potential aliases. + field_type = start_e->GetOp1()->GetType(); + } + + else + field = -1; // flags that there's no relevant field +} + +TraversalCode CSE_ValidityChecker::PreStmt(const Stmt* s) { + auto t = s->Tag(); + + if ( t == STMT_WHEN ) { + // These are too hard to analyze - they result in lambda calls + // that can affect aggregates, etc. + is_valid = false; + return TC_ABORTALL; + } + + if ( t == STMT_ADD || t == STMT_DELETE ) + in_aggr_mod_stmt = true; + + return TC_CONTINUE; +} + +TraversalCode CSE_ValidityChecker::PostStmt(const Stmt* s) { + if ( s->Tag() == STMT_ADD || s->Tag() == STMT_DELETE ) + in_aggr_mod_stmt = false; + + return TC_CONTINUE; +} + +TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) { + if ( e == start_e ) { + ASSERT(! have_start_e); + have_start_e = true; + + // Don't analyze the expression, as it's our starting + // point and we don't want to conflate its properties + // with those of any intervening expressions. + return TC_CONTINUE; + } + + if ( e == end_e ) { + if ( ! have_start_e ) + reporter->InternalError("CSE_ValidityChecker: saw end but not start"); + + ASSERT(! have_end_e); + have_end_e = true; + + // ... and we're now done. + return TC_ABORTALL; + } + + if ( ! have_start_e ) + // We don't yet have a starting point. + return TC_CONTINUE; + + // We have a starting point, and not yet an ending point. + auto t = e->Tag(); + + switch ( t ) { + case EXPR_ASSIGN: { + auto lhs_ref = e->GetOp1()->AsRefExprPtr(); + auto lhs = lhs_ref->GetOp1()->AsNameExpr(); + + if ( CheckID(lhs->Id(), false) ) + return TC_ABORTALL; + + // Note, we don't use CheckAggrMod() because this is a plain + // assignment. It might be changing a variable's binding to + // an aggregate ("aggr_var = new_aggr_val"), but we don't + // introduce temporaries that are simply aliases of existing + // variables (e.g., we don't have "::#8 = aggr_var"), + // and so there's no concern that the temporary could now be + // referring to the wrong aggregate. If instead we have + // "::#8 = aggr_var$foo", then a reassignment here + // to "aggr_var" will already be caught by CheckID(). + } break; + + case EXPR_INDEX_ASSIGN: { + auto lhs_aggr = e->GetOp1(); + auto lhs_aggr_id = lhs_aggr->AsNameExpr()->Id(); + + if ( CheckID(lhs_aggr_id, true) || CheckTableMod(lhs_aggr->GetType()) ) + return TC_ABORTALL; + } break; + + case EXPR_FIELD_LHS_ASSIGN: { + auto lhs = e->GetOp1(); + auto lhs_aggr_id = lhs->AsNameExpr()->Id(); + auto lhs_field = e->AsFieldLHSAssignExpr()->Field(); + + if ( CheckID(lhs_aggr_id, true) ) + return TC_ABORTALL; + if ( lhs_field == field && same_type(lhs_aggr_id->GetType(), field_type) ) { + is_valid = false; + return TC_ABORTALL; + } + } break; + + case EXPR_APPEND_TO: + // This doesn't directly change any identifiers, but does + // alter an aggregate. + if ( CheckAggrMod(e->GetType()) ) + return TC_ABORTALL; + break; + + case EXPR_CALL: + if ( CheckCall(e->AsCallExpr()) ) + return TC_ABORTALL; + break; + + case EXPR_TABLE_CONSTRUCTOR: + // These have EXPR_ASSIGN's in them that don't + // correspond to actual assignments to variables, + // so we don't want to traverse them. + return TC_ABORTSTMT; + + case EXPR_RECORD_COERCE: + case EXPR_RECORD_CONSTRUCTOR: + // Note, record coercion behaves like constructors in terms of + // potentially executing &default functions. In either case, + // the type of the expression reflects the type we want to analyze + // for side effects. + if ( CheckRecordConstructor(e->GetType()) ) + return TC_ABORTALL; + break; + + case EXPR_INDEX: + case EXPR_FIELD: { + // We treat these together because they both have to be checked + // when inside an "add" or "delete" statement. + auto aggr = e->GetOp1(); + auto aggr_t = aggr->GetType(); + + if ( in_aggr_mod_stmt ) { + auto aggr_id = aggr->AsNameExpr()->Id(); + + if ( CheckID(aggr_id, true) || CheckAggrMod(aggr_t) ) + return TC_ABORTALL; + } + + else if ( t == EXPR_INDEX && aggr_t->Tag() == TYPE_TABLE ) { + if ( CheckTableRef(aggr_t) ) + return TC_ABORTALL; + } + } break; + + default: break; + } + + return TC_CONTINUE; +} + +bool CSE_ValidityChecker::CheckID(const ID* id, bool ignore_orig) { + for ( auto i : ids ) { + if ( ignore_orig && i == ids.front() ) + continue; + + if ( id == i ) + return Invalid(); // reassignment + } + + return false; +} + +bool CSE_ValidityChecker::CheckAggrMod(const TypePtr& t) { + if ( ! IsAggr(t) ) + return false; + + for ( auto i : ids ) + if ( same_type(t, i->GetType()) ) + return Invalid(); + + return false; +} + +bool CSE_ValidityChecker::CheckRecordConstructor(const TypePtr& t) { + if ( t->Tag() != TYPE_RECORD ) + return false; + + return CheckSideEffects(SideEffectsOp::CONSTRUCTION, t); +} + +bool CSE_ValidityChecker::CheckTableMod(const TypePtr& t) { + if ( CheckAggrMod(t) ) + return true; + + if ( t->Tag() != TYPE_TABLE ) + return false; + + return CheckSideEffects(SideEffectsOp::WRITE, t); +} + +bool CSE_ValidityChecker::CheckTableRef(const TypePtr& t) { return CheckSideEffects(SideEffectsOp::READ, t); } + +bool CSE_ValidityChecker::CheckCall(const CallExpr* c) { + auto func = c->Func(); + std::string desc; + if ( func->Tag() != EXPR_NAME ) + // Can't analyze indirect calls. + return Invalid(); + + IDSet non_local_ids; + TypeSet aggrs; + bool is_unknown = false; + + auto resolved = pfs->GetCallSideEffects(func->AsNameExpr(), non_local_ids, aggrs, is_unknown); + ASSERT(resolved); + + if ( is_unknown || CheckSideEffects(non_local_ids, aggrs) ) + return Invalid(); + + return false; +} + +bool CSE_ValidityChecker::CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) { + IDSet non_local_ids; + TypeSet aggrs; + + if ( pfs->GetSideEffects(access, t.get(), non_local_ids, aggrs) ) + return Invalid(); + + return CheckSideEffects(non_local_ids, aggrs); +} + +bool CSE_ValidityChecker::CheckSideEffects(const IDSet& non_local_ids, const TypeSet& aggrs) { + if ( non_local_ids.empty() && aggrs.empty() ) + // This is far and away the most common case. + return false; + + for ( auto i : ids ) { + for ( auto nli : non_local_ids ) + if ( nli == i ) + return Invalid(); + + auto i_t = i->GetType(); + for ( auto a : aggrs ) + if ( same_type(a, i_t.get()) ) + return Invalid(); + } + + return false; +} + +} // namespace zeek::detail diff --git a/src/script_opt/CSE.h b/src/script_opt/CSE.h new file mode 100644 index 00000000000..9507cd493e8 --- /dev/null +++ b/src/script_opt/CSE.h @@ -0,0 +1,116 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#pragma once + +#include "zeek/script_opt/ProfileFunc.h" + +namespace zeek::detail { + +class TempVar; + +// Helper class that walks an AST to determine whether it's safe to +// substitute a common subexpression (which at this point is an assignment +// to a variable) created using the assignment expression at position "start_e", +// at the location specified by the expression at position "end_e". +// +// See Reducer::ExprValid for a discussion of what's required for safety. + +class CSE_ValidityChecker : public TraversalCallback { +public: + CSE_ValidityChecker(std::shared_ptr pfs, const std::vector& ids, const Expr* start_e, + const Expr* end_e); + + TraversalCode PreStmt(const Stmt*) override; + TraversalCode PostStmt(const Stmt*) override; + TraversalCode PreExpr(const Expr*) override; + + // Returns the ultimate verdict re safety. + bool IsValid() const { + if ( ! is_valid ) + return false; + + if ( ! have_end_e ) + reporter->InternalError("CSE_ValidityChecker: saw start but not end"); + return true; + } + +protected: + // Returns true if an assignment involving the given identifier on + // the LHS is in conflict with the identifiers we're tracking. + bool CheckID(const ID* id, bool ignore_orig); + + // Returns true if a modification to an aggregate of the given type + // potentially aliases with one of the identifiers we're tracking. + bool CheckAggrMod(const TypePtr& t); + + // Returns true if a record constructor/coercion of the given type has + // side effects and invalides the CSE opportunity. + bool CheckRecordConstructor(const TypePtr& t); + + // The same for modifications to tables. + bool CheckTableMod(const TypePtr& t); + + // The same for accessing (reading) tables. + bool CheckTableRef(const TypePtr& t); + + // The same for the given function call. + bool CheckCall(const CallExpr* c); + + // True if the given form of access to the given type has side effects. + bool CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t); + + // True if side effects to the given identifiers and aggregates invalidate + // the CSE opportunity. + bool CheckSideEffects(const IDSet& non_local_ids, const TypeSet& aggrs); + + // Helper function that marks the CSE opportunity as invalid and returns + // "true" (used by various methods to signal invalidation). + bool Invalid() { + is_valid = false; + return true; + } + + // Profile across all script functions. + std::shared_ptr pfs; + + // The list of identifiers for which an assignment to one of them + // renders the CSE unsafe. + const std::vector& ids; + + // Where in the AST to start our analysis. This is the initial + // assignment expression. + const Expr* start_e; + + // Where in the AST to end our analysis. + const Expr* end_e; + + // If what we're analyzing is a record element, then its offset. + // -1 if not. + int field; + + // The type of that record element, if any. + TypePtr field_type; + + // The verdict so far. + bool is_valid = true; + + // Whether we've encountered the start/end expression in + // the AST traversal. + bool have_start_e = false; + bool have_end_e = false; + + // Whether analyzed expressions occur in the context of a statement + // that modifies an aggregate ("add" or "delete"), which changes the + // interpretation of the expressions. + bool in_aggr_mod_stmt = false; +}; + +// Used for debugging, to communicate which expression wasn't +// reduced when we expected them all to be. +extern const Expr* non_reduced_perp; +extern bool checking_reduction; + +// Used to report a non-reduced expression. +extern bool NonReduced(const Expr* perp); + +} // namespace zeek::detail diff --git a/src/script_opt/FuncInfo.cc b/src/script_opt/FuncInfo.cc index 57730670e0e..e27dc8282c9 100644 --- a/src/script_opt/FuncInfo.cc +++ b/src/script_opt/FuncInfo.cc @@ -6,461 +6,507 @@ namespace zeek::detail { -// The following BiFs do not have any script-level side effects. It's -// followed by comments listing the BiFs that have been omitted, and why. -// -// See script_opt/ZAM/maint/README for maintenance of these lists. +// See script_opt/ZAM/maint/README for maintenance of the attributes +// in this file. + +// Attributes associated with functions. Currently these are mutually +// exclusive (i.e., no function will have more than one), but for now +// we use a bitmask-style approach so we can accommodate future attributes +// that might overlap. + +// BiF Functions that are not listed are assumed to have Unknown side effects. +// (These are described in comments after the table definition.) Script +// functions that are not listed as assumed to not be "special", i.e. known +// to the event engine. + +// Does not change script-level state (though may change internal state). +#define ATTR_NO_SCRIPT_SIDE_EFFECTS 0x1 + +// Does not change any Zeek state, internal or external. (May change +// state outside of Zeek, such as file system elements.) Implies +// ATTR_NO_SCRIPT_SIDE_EFFECTS. +#define ATTR_NO_ZEEK_SIDE_EFFECTS 0x2 + +// Calls made with the same arguments yield the same results. Implies +// ATTR_NO_ZEEK_SIDE_EFFECTS. +#define ATTR_IDEMPOTENT 0x4 -static std::unordered_set side_effects_free_BiFs = { - "Analyzer::__disable_all_analyzers", - "Analyzer::__disable_analyzer", - "Analyzer::__enable_analyzer", - "Analyzer::__has_tag", - "Analyzer::__name", - "Analyzer::__register_for_port", - "Analyzer::__schedule_analyzer", - "Analyzer::__tag", - "FileExtract::__set_limit", - "Files::__add_analyzer", - "Files::__analyzer_enabled", - "Files::__analyzer_name", - "Files::__disable_analyzer", - "Files::__disable_reassembly", - "Files::__enable_analyzer", - "Files::__enable_reassembly", - "Files::__file_exists", - "Files::__lookup_file", - "Files::__remove_analyzer", - "Files::__set_reassembly_buffer", - "Files::__set_timeout_interval", - "Files::__stop", - "Input::__create_analysis_stream", - "Input::__create_event_stream", - "Input::__create_table_stream", - "Input::__force_update", - "Input::__remove_stream", - "Log::__add_filter", - "Log::__create_stream", - "Log::__disable_stream", - "Log::__enable_stream", - "Log::__flush", - "Log::__remove_filter", - "Log::__remove_stream", - "Log::__set_buf", - "Option::any_set_to_any_vec", - "Option::set_change_handler", - "PacketAnalyzer::GTPV1::remove_gtpv1_connection", - "PacketAnalyzer::TEREDO::remove_teredo_connection", - "PacketAnalyzer::__disable_analyzer", - "PacketAnalyzer::__enable_analyzer", - "PacketAnalyzer::__set_ignore_checksums_nets", - "PacketAnalyzer::register_packet_analyzer", - "PacketAnalyzer::register_protocol_detection", - "PacketAnalyzer::try_register_packet_analyzer_by_name", - "Pcap::error", - "Pcap::findalldevs", - "Pcap::get_filter_state", - "Pcap::get_filter_state_string", - "Pcap::install_pcap_filter", - "Pcap::precompile_pcap_filter", - "Reporter::conn_weird", - "Reporter::error", - "Reporter::fatal", - "Reporter::fatal_error_with_core", - "Reporter::file_weird", - "Reporter::flow_weird", - "Reporter::get_weird_sampling_duration", - "Reporter::get_weird_sampling_global_list", - "Reporter::get_weird_sampling_rate", - "Reporter::get_weird_sampling_threshold", - "Reporter::get_weird_sampling_whitelist", - "Reporter::info", - "Reporter::net_weird", - "Reporter::set_weird_sampling_duration", - "Reporter::set_weird_sampling_global_list", - "Reporter::set_weird_sampling_rate", - "Reporter::set_weird_sampling_threshold", - "Reporter::set_weird_sampling_whitelist", - "Reporter::warning", - "Spicy::__resource_usage", - "Spicy::__toggle_analyzer", - "Supervisor::__create", - "Supervisor::__destroy", - "Supervisor::__init_cluster", - "Supervisor::__is_supervised", - "Supervisor::__is_supervisor", - "Supervisor::__node", - "Supervisor::__restart", - "Supervisor::__status", - "Supervisor::__stem_pid", - "Telemetry::__collect_histogram_metrics", - "Telemetry::__collect_metrics", - "Telemetry::__dbl_counter_family", - "Telemetry::__dbl_counter_inc", - "Telemetry::__dbl_counter_metric_get_or_add", - "Telemetry::__dbl_counter_value", - "Telemetry::__dbl_gauge_dec", - "Telemetry::__dbl_gauge_family", - "Telemetry::__dbl_gauge_inc", - "Telemetry::__dbl_gauge_metric_get_or_add", - "Telemetry::__dbl_gauge_value", - "Telemetry::__dbl_histogram_family", - "Telemetry::__dbl_histogram_metric_get_or_add", - "Telemetry::__dbl_histogram_observe", - "Telemetry::__dbl_histogram_sum", - "Telemetry::__int_counter_family", - "Telemetry::__int_counter_inc", - "Telemetry::__int_counter_metric_get_or_add", - "Telemetry::__int_counter_value", - "Telemetry::__int_gauge_dec", - "Telemetry::__int_gauge_family", - "Telemetry::__int_gauge_inc", - "Telemetry::__int_gauge_metric_get_or_add", - "Telemetry::__int_gauge_value", - "Telemetry::__int_histogram_family", - "Telemetry::__int_histogram_metric_get_or_add", - "Telemetry::__int_histogram_observe", - "Telemetry::__int_histogram_sum", - "__init_primary_bifs", - "__init_secondary_bifs", - "active_file", - "addr_to_counts", - "addr_to_ptr_name", - "addr_to_subnet", - "all_set", - "anonymize_addr", - "any_set", - "backtrace", - "bare_mode", - "bloomfilter_add", - "bloomfilter_basic_init", - "bloomfilter_basic_init2", - "bloomfilter_clear", - "bloomfilter_counting_init", - "bloomfilter_decrement", - "bloomfilter_internal_state", - "bloomfilter_intersect", - "bloomfilter_lookup", - "bloomfilter_merge", - "bytestring_to_count", - "bytestring_to_double", - "bytestring_to_float", - "bytestring_to_hexstr", - "calc_next_rotate", - "cat", - "cat_sep", - "ceil", - "check_subnet", - "clean", - "close", - "community_id_v1", - "compress_path", - "connection_exists", - "continue_processing", - "convert_for_pattern", - "count_substr", - "count_to_double", - "count_to_port", - "count_to_v4_addr", - "counts_to_addr", - "current_analyzer", - "current_event_time", - "current_time", - "decode_base64", - "decode_base64_conn", - "decode_netbios_name", - "decode_netbios_name_type", - "disable_event_group", - "disable_module_events", - "do_profiling", - "double_to_count", - "double_to_int", - "double_to_interval", - "double_to_time", - "dump_current_packet", - "dump_packet", - "dump_rule_stats", - "edit", - "enable_event_group", - "enable_module_events", - "enable_raw_output", - "encode_base64", - "ends_with", - "entropy_test_add", - "entropy_test_finish", - "entropy_test_init", - "enum_names", - "enum_to_int", - "escape_string", - "exit", - "exp", - "file_magic", - "file_mode", - "file_size", - "filter_subnet_table", - "find_all", - "find_all_ordered", - "find_entropy", - "find_last", - "find_str", - "floor", - "flush_all", - "fmt", - "fmt_ftp_port", - "fnv1a32", - "generate_all_events", - "get_broker_stats", - "get_conn_stats", - "get_conn_transport_proto", - "get_contents_file", - "get_current_conn_bytes_threshold", - "get_current_conn_duration_threshold", - "get_current_conn_packets_threshold", - "get_current_packet", - "get_current_packet_header", - "get_dns_stats", - "get_event_handler_stats", - "get_event_stats", - "get_file_analysis_stats", - "get_file_name", - "get_gap_stats", - "get_identifier_comments", - "get_identifier_declaring_script", - "get_login_state", - "get_matcher_stats", - "get_net_stats", - "get_orig_seq", - "get_package_readme", - "get_port_transport_proto", - "get_proc_stats", - "get_reassembler_stats", - "get_record_field_comments", - "get_record_field_declaring_script", - "get_reporter_stats", - "get_resp_seq", - "get_script_comments", - "get_thread_stats", - "get_timer_stats", - "getenv", - "gethostname", - "getpid", - "global_container_footprints", - "global_ids", - "global_options", - "gsub", - "has_event_group", - "has_module_events", - "have_spicy", - "have_spicy_analyzers", - "haversine_distance", - "hexdump", - "hexstr_to_bytestring", - "hll_cardinality_add", - "hll_cardinality_copy", - "hll_cardinality_estimate", - "hll_cardinality_init", - "hll_cardinality_merge_into", - "hrw_weight", - "identify_data", - "install_dst_addr_filter", - "install_dst_net_filter", - "install_src_addr_filter", - "install_src_net_filter", - "int_to_count", - "int_to_double", - "interval_to_double", - "is_alnum", - "is_alpha", - "is_ascii", - "is_file_analyzer", - "is_icmp_port", - "is_local_interface", - "is_num", - "is_packet_analyzer", - "is_processing_suspended", - "is_protocol_analyzer", - "is_remote_event", - "is_tcp_port", - "is_udp_port", - "is_v4_addr", - "is_v4_subnet", - "is_v6_addr", - "is_v6_subnet", - "is_valid_ip", - "join_string_set", - "join_string_vec", - "levenshtein_distance", - "ljust", - "ln", - "load_CPP", - "log10", - "log2", - "lookup_ID", - "lookup_addr", - "lookup_autonomous_system", - "lookup_connection", - "lookup_hostname", - "lookup_hostname_txt", - "lookup_location", - "lstrip", - "mask_addr", - "match_signatures", - "matching_subnets", - "md5_hash", - "md5_hash_finish", - "md5_hash_init", - "md5_hash_update", - "md5_hmac", - "mkdir", - "mmdb_open_asn_db", - "mmdb_open_location_db", - "network_time", - "open", - "open_for_append", - "packet_source", - "paraglob_equals", - "paraglob_init", - "paraglob_match", - "parse_distinguished_name", - "parse_eftp_port", - "parse_ftp_epsv", - "parse_ftp_pasv", - "parse_ftp_port", - "piped_exec", - "port_to_count", - "pow", - "preserve_prefix", - "preserve_subnet", - "print_raw", - "ptr_name_to_addr", - "rand", - "raw_bytes_to_v4_addr", - "raw_bytes_to_v6_addr", - "reading_live_traffic", - "reading_traces", - "record_fields", - "record_type_to_vector", - "remask_addr", - "remove_prefix", - "remove_suffix", - "rename", - "reverse", - "rfind_str", - "rjust", - "rmdir", - "rotate_file", - "rotate_file_by_name", - "routing0_data_to_addrs", - "rstrip", - "safe_shell_quote", - "same_object", - "sct_verify", - "set_buf", - "set_contents_file", - "set_current_conn_bytes_threshold", - "set_current_conn_duration_threshold", - "set_current_conn_packets_threshold", - "set_file_handle", - "set_inactivity_timeout", - "set_keys", - "set_login_state", - "set_network_time", - "set_record_packets", - "set_secret", - "set_ssl_established", - "setenv", - "sha1_hash", - "sha1_hash_finish", - "sha1_hash_init", - "sha1_hash_update", - "sha256_hash", - "sha256_hash_finish", - "sha256_hash_init", - "sha256_hash_update", - "skip_further_processing", - "skip_http_entity_data", - "skip_smtp_data", - "split_string", - "split_string1", - "split_string_all", - "split_string_n", - "sqrt", - "srand", - "starts_with", - "str_smith_waterman", - "str_split_indices", - "strcmp", - "strftime", - "string_cat", - "string_fill", - "string_to_ascii_hex", - "string_to_pattern", - "strip", - "strptime", - "strstr", - "sub", - "sub_bytes", - "subnet_to_addr", - "subnet_width", - "subst_string", - "suspend_processing", - "swap_case", - "syslog", - "system", - "system_env", - "table_keys", - "table_values", - "terminate", - "time_to_double", - "to_addr", - "to_count", - "to_double", - "to_int", - "to_json", - "to_lower", - "to_port", - "to_string_literal", - "to_subnet", - "to_title", - "to_upper", - "topk_add", - "topk_count", - "topk_epsilon", - "topk_get_top", - "topk_init", - "topk_merge", - "topk_merge_prune", - "topk_size", - "topk_sum", - "type_aliases", - "type_name", - "unescape_URI", - "uninstall_dst_addr_filter", - "uninstall_dst_net_filter", - "uninstall_src_addr_filter", - "uninstall_src_net_filter", - "unique_id", - "unique_id_from", - "unlink", - "uuid_to_string", - "val_footprint", - "write_file", - "x509_check_cert_hostname", - "x509_check_hostname", - "x509_from_der", - "x509_get_certificate_string", - "x509_issuer_name_hash", - "x509_ocsp_verify", - "x509_parse", - "x509_set_certificate_cache", - "x509_set_certificate_cache_hit_callback", - "x509_spki_hash", - "x509_subject_name_hash", - "x509_verify", - "zeek_args", - "zeek_is_terminating", - "zeek_version", - "zfill", +// The event engine knows about this script function and may call it +// during its processing. +#define ATTR_SPECIAL_SCRIPT_FUNC 0x8 + +static std::unordered_map func_attrs = { + // Script functions. + {"Analyzer::disabling_analyzer", ATTR_SPECIAL_SCRIPT_FUNC}, + {"Log::__default_rotation_postprocessor", ATTR_SPECIAL_SCRIPT_FUNC}, + {"Log::empty_post_delay_cb", ATTR_SPECIAL_SCRIPT_FUNC}, + {"Log::log_stream_policy", ATTR_SPECIAL_SCRIPT_FUNC}, + {"Log::rotation_format_func", ATTR_SPECIAL_SCRIPT_FUNC}, + {"Supervisor::stderr_hook", ATTR_SPECIAL_SCRIPT_FUNC}, + {"Supervisor::stdout_hook", ATTR_SPECIAL_SCRIPT_FUNC}, + {"assertion_failure", ATTR_SPECIAL_SCRIPT_FUNC}, + {"assertion_result", ATTR_SPECIAL_SCRIPT_FUNC}, + {"discarder_check_icmp", ATTR_SPECIAL_SCRIPT_FUNC}, + {"discarder_check_ip", ATTR_SPECIAL_SCRIPT_FUNC}, + {"discarder_check_tcp", ATTR_SPECIAL_SCRIPT_FUNC}, + {"discarder_check_udp", ATTR_SPECIAL_SCRIPT_FUNC}, + {"from_json_default_key_mapper", ATTR_SPECIAL_SCRIPT_FUNC}, + + // BiFs. + {"Analyzer::__disable_all_analyzers", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Analyzer::__disable_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Analyzer::__enable_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Analyzer::__has_tag", ATTR_IDEMPOTENT}, + {"Analyzer::__name", ATTR_IDEMPOTENT}, + {"Analyzer::__register_for_port", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Analyzer::__schedule_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Analyzer::__tag", ATTR_IDEMPOTENT}, + {"FileExtract::__set_limit", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Files::__add_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Files::__analyzer_enabled", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Files::__analyzer_name", ATTR_IDEMPOTENT}, + {"Files::__disable_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Files::__disable_reassembly", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Files::__enable_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Files::__enable_reassembly", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Files::__file_exists", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Files::__lookup_file", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Files::__remove_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Files::__set_reassembly_buffer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Files::__set_timeout_interval", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Files::__stop", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Input::__create_analysis_stream", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Input::__create_event_stream", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Input::__create_table_stream", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Input::__force_update", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Input::__remove_stream", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Log::__add_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Log::__create_stream", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Log::__delay_finish", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Log::__disable_stream", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Log::__enable_stream", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Log::__flush", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Log::__get_delay_queue_size", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Log::__remove_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Log::__remove_stream", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Log::__set_buf", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Log::__set_max_delay_interval", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Log::__set_max_delay_queue_size", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Option::any_set_to_any_vec", ATTR_IDEMPOTENT}, + {"Option::set_change_handler", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"PacketAnalyzer::GTPV1::remove_gtpv1_connection", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"PacketAnalyzer::TEREDO::remove_teredo_connection", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"PacketAnalyzer::__disable_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"PacketAnalyzer::__enable_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"PacketAnalyzer::__set_ignore_checksums_nets", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"PacketAnalyzer::register_packet_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"PacketAnalyzer::register_protocol_detection", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"PacketAnalyzer::try_register_packet_analyzer_by_name", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Pcap::error", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Pcap::findalldevs", ATTR_IDEMPOTENT}, + {"Pcap::get_filter_state", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Pcap::get_filter_state_string", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Pcap::install_pcap_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Pcap::precompile_pcap_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::conn_weird", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::error", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::fatal", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::fatal_error_with_core", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::file_weird", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::flow_weird", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::get_weird_sampling_duration", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Reporter::get_weird_sampling_global_list", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Reporter::get_weird_sampling_rate", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Reporter::get_weird_sampling_threshold", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Reporter::get_weird_sampling_whitelist", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Reporter::info", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::net_weird", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::set_weird_sampling_duration", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::set_weird_sampling_global_list", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::set_weird_sampling_rate", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::set_weird_sampling_threshold", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::set_weird_sampling_whitelist", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Reporter::warning", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Spicy::__resource_usage", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Spicy::__toggle_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Supervisor::__create", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Supervisor::__destroy", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Supervisor::__init_cluster", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Supervisor::__is_supervised", ATTR_IDEMPOTENT}, + {"Supervisor::__is_supervisor", ATTR_IDEMPOTENT}, + {"Supervisor::__node", ATTR_IDEMPOTENT}, + {"Supervisor::__restart", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Supervisor::__status", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"Supervisor::__stem_pid", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__collect_histogram_metrics", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__collect_metrics", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_counter_family", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_counter_inc", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_counter_metric_get_or_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_counter_value", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_gauge_dec", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_gauge_family", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_gauge_inc", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_gauge_metric_get_or_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_gauge_value", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_histogram_family", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_histogram_metric_get_or_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_histogram_observe", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__dbl_histogram_sum", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_counter_family", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_counter_inc", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_counter_metric_get_or_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_counter_value", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_gauge_dec", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_gauge_family", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_gauge_inc", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_gauge_metric_get_or_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_gauge_value", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_histogram_family", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_histogram_metric_get_or_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_histogram_observe", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"Telemetry::__int_histogram_sum", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"__init_primary_bifs", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"__init_secondary_bifs", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"active_file", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"addr_to_counts", ATTR_IDEMPOTENT}, + {"addr_to_ptr_name", ATTR_IDEMPOTENT}, + {"addr_to_subnet", ATTR_IDEMPOTENT}, + {"all_set", ATTR_IDEMPOTENT}, + {"anonymize_addr", ATTR_IDEMPOTENT}, + {"any_set", ATTR_IDEMPOTENT}, + {"backtrace", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"bare_mode", ATTR_IDEMPOTENT}, + {"bloomfilter_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"bloomfilter_basic_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"bloomfilter_basic_init2", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"bloomfilter_clear", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"bloomfilter_counting_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"bloomfilter_decrement", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"bloomfilter_internal_state", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"bloomfilter_intersect", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"bloomfilter_lookup", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"bloomfilter_merge", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"bytestring_to_count", ATTR_IDEMPOTENT}, + {"bytestring_to_double", ATTR_IDEMPOTENT}, + {"bytestring_to_float", ATTR_IDEMPOTENT}, + {"bytestring_to_hexstr", ATTR_IDEMPOTENT}, + {"calc_next_rotate", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"cat", ATTR_IDEMPOTENT}, + {"cat_sep", ATTR_IDEMPOTENT}, + {"ceil", ATTR_IDEMPOTENT}, + {"check_subnet", ATTR_IDEMPOTENT}, + {"clean", ATTR_IDEMPOTENT}, + {"close", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"community_id_v1", ATTR_IDEMPOTENT}, + {"compress_path", ATTR_IDEMPOTENT}, + {"connection_exists", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"continue_processing", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"convert_for_pattern", ATTR_IDEMPOTENT}, + {"count_substr", ATTR_IDEMPOTENT}, + {"count_to_double", ATTR_IDEMPOTENT}, + {"count_to_port", ATTR_IDEMPOTENT}, + {"count_to_v4_addr", ATTR_IDEMPOTENT}, + {"counts_to_addr", ATTR_IDEMPOTENT}, + {"current_analyzer", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"current_event_time", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"current_time", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"decode_base64", ATTR_IDEMPOTENT}, + {"decode_base64_conn", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"decode_netbios_name", ATTR_IDEMPOTENT}, + {"decode_netbios_name_type", ATTR_IDEMPOTENT}, + {"disable_event_group", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"disable_module_events", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"do_profiling", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"double_to_count", ATTR_IDEMPOTENT}, + {"double_to_int", ATTR_IDEMPOTENT}, + {"double_to_interval", ATTR_IDEMPOTENT}, + {"double_to_time", ATTR_IDEMPOTENT}, + {"dump_current_packet", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"dump_packet", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"dump_rule_stats", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"edit", ATTR_IDEMPOTENT}, + {"enable_event_group", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"enable_module_events", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"enable_raw_output", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"encode_base64", ATTR_IDEMPOTENT}, + {"ends_with", ATTR_IDEMPOTENT}, + {"entropy_test_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"entropy_test_finish", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"entropy_test_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"enum_names", ATTR_IDEMPOTENT}, + {"enum_to_int", ATTR_IDEMPOTENT}, + {"escape_string", ATTR_IDEMPOTENT}, + {"exit", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"exp", ATTR_IDEMPOTENT}, + {"file_magic", ATTR_IDEMPOTENT}, + {"file_mode", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"file_size", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"filter_subnet_table", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"find_all", ATTR_IDEMPOTENT}, + {"find_all_ordered", ATTR_IDEMPOTENT}, + {"find_entropy", ATTR_IDEMPOTENT}, + {"find_last", ATTR_IDEMPOTENT}, + {"find_str", ATTR_IDEMPOTENT}, + {"floor", ATTR_IDEMPOTENT}, + {"flush_all", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"fmt", ATTR_IDEMPOTENT}, + {"fmt_ftp_port", ATTR_IDEMPOTENT}, + {"fnv1a32", ATTR_IDEMPOTENT}, + {"generate_all_events", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"get_broker_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_conn_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_conn_transport_proto", ATTR_IDEMPOTENT}, + {"get_contents_file", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_current_conn_bytes_threshold", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_current_conn_duration_threshold", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_current_conn_packets_threshold", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_current_packet", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_current_packet_header", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_dns_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_event_handler_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_event_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_file_analysis_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_file_name", ATTR_IDEMPOTENT}, + {"get_gap_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_identifier_comments", ATTR_IDEMPOTENT}, + {"get_identifier_declaring_script", ATTR_IDEMPOTENT}, + {"get_login_state", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_matcher_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_net_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_orig_seq", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_package_readme", ATTR_IDEMPOTENT}, + {"get_port_transport_proto", ATTR_IDEMPOTENT}, + {"get_proc_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_reassembler_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_record_field_comments", ATTR_IDEMPOTENT}, + {"get_record_field_declaring_script", ATTR_IDEMPOTENT}, + {"get_reporter_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_resp_seq", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_script_comments", ATTR_IDEMPOTENT}, + {"get_thread_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"get_timer_stats", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"getenv", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"gethostname", ATTR_IDEMPOTENT}, + {"getpid", ATTR_IDEMPOTENT}, + {"global_container_footprints", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"global_ids", ATTR_IDEMPOTENT}, + {"global_options", ATTR_IDEMPOTENT}, + {"gsub", ATTR_IDEMPOTENT}, + {"has_event_group", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"has_module_events", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"have_spicy", ATTR_IDEMPOTENT}, + {"have_spicy_analyzers", ATTR_IDEMPOTENT}, + {"haversine_distance", ATTR_IDEMPOTENT}, + {"hexdump", ATTR_IDEMPOTENT}, + {"hexstr_to_bytestring", ATTR_IDEMPOTENT}, + {"hll_cardinality_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"hll_cardinality_copy", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"hll_cardinality_estimate", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"hll_cardinality_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"hll_cardinality_merge_into", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"hrw_weight", ATTR_IDEMPOTENT}, + {"identify_data", ATTR_IDEMPOTENT}, + {"install_dst_addr_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"install_dst_net_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"install_src_addr_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"install_src_net_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"int_to_count", ATTR_IDEMPOTENT}, + {"int_to_double", ATTR_IDEMPOTENT}, + {"interval_to_double", ATTR_IDEMPOTENT}, + {"is_alnum", ATTR_IDEMPOTENT}, + {"is_alpha", ATTR_IDEMPOTENT}, + {"is_ascii", ATTR_IDEMPOTENT}, + {"is_file_analyzer", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"is_icmp_port", ATTR_IDEMPOTENT}, + {"is_local_interface", ATTR_IDEMPOTENT}, + {"is_num", ATTR_IDEMPOTENT}, + {"is_packet_analyzer", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"is_processing_suspended", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"is_protocol_analyzer", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"is_remote_event", ATTR_IDEMPOTENT}, + {"is_tcp_port", ATTR_IDEMPOTENT}, + {"is_udp_port", ATTR_IDEMPOTENT}, + {"is_v4_addr", ATTR_IDEMPOTENT}, + {"is_v4_subnet", ATTR_IDEMPOTENT}, + {"is_v6_addr", ATTR_IDEMPOTENT}, + {"is_v6_subnet", ATTR_IDEMPOTENT}, + {"is_valid_ip", ATTR_IDEMPOTENT}, + {"join_string_set", ATTR_IDEMPOTENT}, + {"join_string_vec", ATTR_IDEMPOTENT}, + {"levenshtein_distance", ATTR_IDEMPOTENT}, + {"ljust", ATTR_IDEMPOTENT}, + {"ln", ATTR_IDEMPOTENT}, + {"load_CPP", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"log10", ATTR_IDEMPOTENT}, + {"log2", ATTR_IDEMPOTENT}, + {"lookup_ID", ATTR_IDEMPOTENT}, + {"lookup_addr", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"lookup_autonomous_system", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"lookup_connection", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"lookup_hostname", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"lookup_hostname_txt", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"lookup_location", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"lstrip", ATTR_IDEMPOTENT}, + {"mask_addr", ATTR_IDEMPOTENT}, + {"match_signatures", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"matching_subnets", ATTR_IDEMPOTENT}, + {"md5_hash", ATTR_IDEMPOTENT}, + {"md5_hash_finish", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"md5_hash_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"md5_hash_update", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"md5_hmac", ATTR_IDEMPOTENT}, + {"mkdir", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"mmdb_open_asn_db", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"mmdb_open_location_db", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"network_time", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"open", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"open_for_append", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"packet_source", ATTR_IDEMPOTENT}, + {"paraglob_equals", ATTR_IDEMPOTENT}, + {"paraglob_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"paraglob_match", ATTR_IDEMPOTENT}, + {"parse_distinguished_name", ATTR_IDEMPOTENT}, + {"parse_eftp_port", ATTR_IDEMPOTENT}, + {"parse_ftp_epsv", ATTR_IDEMPOTENT}, + {"parse_ftp_pasv", ATTR_IDEMPOTENT}, + {"parse_ftp_port", ATTR_IDEMPOTENT}, + {"piped_exec", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"port_to_count", ATTR_IDEMPOTENT}, + {"pow", ATTR_IDEMPOTENT}, + {"preserve_prefix", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"preserve_subnet", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"print_raw", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"ptr_name_to_addr", ATTR_IDEMPOTENT}, + {"rand", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"raw_bytes_to_v4_addr", ATTR_IDEMPOTENT}, + {"raw_bytes_to_v6_addr", ATTR_IDEMPOTENT}, + {"reading_live_traffic", ATTR_IDEMPOTENT}, + {"reading_traces", ATTR_IDEMPOTENT}, + {"record_fields", ATTR_IDEMPOTENT}, + {"record_type_to_vector", ATTR_IDEMPOTENT}, + {"remask_addr", ATTR_IDEMPOTENT}, + {"remove_prefix", ATTR_IDEMPOTENT}, + {"remove_suffix", ATTR_IDEMPOTENT}, + {"rename", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"reverse", ATTR_IDEMPOTENT}, + {"rfind_str", ATTR_IDEMPOTENT}, + {"rjust", ATTR_IDEMPOTENT}, + {"rmdir", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"rotate_file", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"rotate_file_by_name", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"routing0_data_to_addrs", ATTR_IDEMPOTENT}, + {"rstrip", ATTR_IDEMPOTENT}, + {"safe_shell_quote", ATTR_IDEMPOTENT}, + {"same_object", ATTR_IDEMPOTENT}, + {"sct_verify", ATTR_IDEMPOTENT}, + {"set_buf", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"set_contents_file", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"set_current_conn_bytes_threshold", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"set_current_conn_duration_threshold", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"set_current_conn_packets_threshold", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"set_file_handle", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"set_inactivity_timeout", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"set_keys", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"set_login_state", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"set_network_time", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"set_record_packets", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"set_secret", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"set_ssl_established", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"setenv", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"sha1_hash", ATTR_IDEMPOTENT}, + {"sha1_hash_finish", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"sha1_hash_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"sha1_hash_update", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"sha256_hash", ATTR_IDEMPOTENT}, + {"sha256_hash_finish", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"sha256_hash_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"sha256_hash_update", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"skip_further_processing", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"skip_http_entity_data", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"skip_smtp_data", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"split_string", ATTR_IDEMPOTENT}, + {"split_string1", ATTR_IDEMPOTENT}, + {"split_string_all", ATTR_IDEMPOTENT}, + {"split_string_n", ATTR_IDEMPOTENT}, + {"sqrt", ATTR_IDEMPOTENT}, + {"srand", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"starts_with", ATTR_IDEMPOTENT}, + {"str_smith_waterman", ATTR_IDEMPOTENT}, + {"str_split_indices", ATTR_IDEMPOTENT}, + {"strcmp", ATTR_IDEMPOTENT}, + {"strftime", ATTR_IDEMPOTENT}, + {"string_cat", ATTR_IDEMPOTENT}, + {"string_fill", ATTR_IDEMPOTENT}, + {"string_to_ascii_hex", ATTR_IDEMPOTENT}, + {"string_to_pattern", ATTR_IDEMPOTENT}, + {"strip", ATTR_IDEMPOTENT}, + {"strptime", ATTR_IDEMPOTENT}, + {"strstr", ATTR_IDEMPOTENT}, + {"sub", ATTR_IDEMPOTENT}, + {"sub_bytes", ATTR_IDEMPOTENT}, + {"subnet_to_addr", ATTR_IDEMPOTENT}, + {"subnet_width", ATTR_IDEMPOTENT}, + {"subst_string", ATTR_IDEMPOTENT}, + {"suspend_processing", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"swap_case", ATTR_IDEMPOTENT}, + {"syslog", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"system", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"system_env", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"table_keys", ATTR_IDEMPOTENT}, + {"table_pattern_matcher_stats", ATTR_IDEMPOTENT}, + {"table_values", ATTR_IDEMPOTENT}, + {"terminate", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"time_to_double", ATTR_IDEMPOTENT}, + {"to_addr", ATTR_IDEMPOTENT}, + {"to_count", ATTR_IDEMPOTENT}, + {"to_double", ATTR_IDEMPOTENT}, + {"to_int", ATTR_IDEMPOTENT}, + {"to_json", ATTR_IDEMPOTENT}, + {"to_lower", ATTR_IDEMPOTENT}, + {"to_port", ATTR_IDEMPOTENT}, + {"to_string_literal", ATTR_IDEMPOTENT}, + {"to_subnet", ATTR_IDEMPOTENT}, + {"to_title", ATTR_IDEMPOTENT}, + {"to_upper", ATTR_IDEMPOTENT}, + {"topk_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"topk_count", ATTR_IDEMPOTENT}, + {"topk_epsilon", ATTR_IDEMPOTENT}, + {"topk_get_top", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"topk_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"topk_merge", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"topk_merge_prune", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"topk_size", ATTR_IDEMPOTENT}, + {"topk_sum", ATTR_IDEMPOTENT}, + {"type_aliases", ATTR_IDEMPOTENT}, + {"type_name", ATTR_IDEMPOTENT}, + {"unescape_URI", ATTR_IDEMPOTENT}, + {"uninstall_dst_addr_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"uninstall_dst_net_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"uninstall_src_addr_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"uninstall_src_net_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"unique_id", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"unique_id_from", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"unlink", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"uuid_to_string", ATTR_IDEMPOTENT}, + {"val_footprint", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"write_file", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"x509_check_cert_hostname", ATTR_IDEMPOTENT}, + {"x509_check_hostname", ATTR_IDEMPOTENT}, + {"x509_from_der", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"x509_get_certificate_string", ATTR_IDEMPOTENT}, + {"x509_issuer_name_hash", ATTR_IDEMPOTENT}, + {"x509_ocsp_verify", ATTR_IDEMPOTENT}, + {"x509_parse", ATTR_IDEMPOTENT}, + {"x509_set_certificate_cache", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"x509_set_certificate_cache_hit_callback", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"x509_spki_hash", ATTR_IDEMPOTENT}, + {"x509_subject_name_hash", ATTR_IDEMPOTENT}, + {"x509_verify", ATTR_IDEMPOTENT}, + {"zeek_args", ATTR_IDEMPOTENT}, + {"zeek_is_terminating", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"zeek_version", ATTR_IDEMPOTENT}, + {"zfill", ATTR_IDEMPOTENT}, }; // Ones not listed: @@ -472,6 +518,9 @@ static std::unordered_set side_effects_free_BiFs = { // Cluster::publish_rr // These call script functions to get topic names. // +// Log::__delay +// Can invoke a callback function specified at run-time. +// // Log::__write // Calls log policy functions. // @@ -501,6 +550,22 @@ static std::unordered_set side_effects_free_BiFs = { // Some of these have side effects that could be checked for in a specific // context, but the gains from doing so likely aren't worth the complexity. -bool is_side_effect_free(std::string func_name) { return side_effects_free_BiFs.count(func_name) > 0; } +bool is_special_script_func(std::string func_name) { + auto f_attr = func_attrs.find(func_name); + return f_attr != func_attrs.end() && (f_attr->second & ATTR_SPECIAL_SCRIPT_FUNC) != 0; +} + +bool is_idempotent(std::string func_name) { + auto f_attr = func_attrs.find(func_name); + return f_attr != func_attrs.end() && (f_attr->second & ATTR_IDEMPOTENT) != 0; +} + +bool has_no_script_side_effects(std::string func_name) { + auto f_attr = func_attrs.find(func_name); + if ( f_attr == func_attrs.end() ) + return false; + + return (f_attr->second & (ATTR_NO_SCRIPT_SIDE_EFFECTS | ATTR_NO_ZEEK_SIDE_EFFECTS | ATTR_IDEMPOTENT)) != 0; +} } // namespace zeek::detail diff --git a/src/script_opt/FuncInfo.h b/src/script_opt/FuncInfo.h index a7925619ae2..f4bf333cc0e 100644 --- a/src/script_opt/FuncInfo.h +++ b/src/script_opt/FuncInfo.h @@ -1,10 +1,6 @@ // See the file "COPYING" in the main distribution directory for copyright. -// Utility functions that return information about Zeek functions. Currently -// this is limited to information about whether BiFs are side-effect-free -// (from a Zeek scripting perspective), but could be expanded in the future -// to include information about Zeek script functions, idempotency, and the -// like. +// Utility functions that return information about Zeek functions. #pragma once @@ -12,6 +8,17 @@ namespace zeek::detail { -extern bool is_side_effect_free(std::string func_name); +// A "special script function" is one that the event engine explicitly +// knows about. +extern bool is_special_script_func(std::string func_name); + +// An idempotent function returns the same value when called with the +// same arguments (and has no meaningful side effects in terms of script-level +// or Zeek-internal state). +extern bool is_idempotent(std::string func_name); + +// Whether the given function (currently, just BiFs) has no Zeek-script-level +// side effects. +extern bool has_no_script_side_effects(std::string func_name); } // namespace zeek::detail diff --git a/src/script_opt/GenIDDefs.cc b/src/script_opt/GenIDDefs.cc index 836e99586b9..b05f334ea64 100644 --- a/src/script_opt/GenIDDefs.cc +++ b/src/script_opt/GenIDDefs.cc @@ -12,12 +12,12 @@ namespace zeek::detail { -GenIDDefs::GenIDDefs(std::shared_ptr _pf, const Func* f, ScopePtr scope, StmtPtr body) +GenIDDefs::GenIDDefs(std::shared_ptr _pf, const FuncPtr& f, ScopePtr scope, StmtPtr body) : pf(std::move(_pf)) { TraverseFunction(f, scope, body); } -void GenIDDefs::TraverseFunction(const Func* f, ScopePtr scope, StmtPtr body) { +void GenIDDefs::TraverseFunction(const FuncPtr& f, ScopePtr scope, StmtPtr body) { func_flavor = f->Flavor(); // Establish the outermost set of identifiers. diff --git a/src/script_opt/GenIDDefs.h b/src/script_opt/GenIDDefs.h index a8b05864cb5..5074415f54c 100644 --- a/src/script_opt/GenIDDefs.h +++ b/src/script_opt/GenIDDefs.h @@ -12,12 +12,12 @@ namespace zeek::detail { class GenIDDefs : public TraversalCallback { public: - GenIDDefs(std::shared_ptr _pf, const Func* f, ScopePtr scope, StmtPtr body); + GenIDDefs(std::shared_ptr _pf, const FuncPtr& f, ScopePtr scope, StmtPtr body); private: // Traverses the given function body, using the first two // arguments for context. - void TraverseFunction(const Func* f, ScopePtr scope, StmtPtr body); + void TraverseFunction(const FuncPtr& f, ScopePtr scope, StmtPtr body); TraversalCode PreStmt(const Stmt*) override; void AnalyzeSwitch(const SwitchStmt* sw); diff --git a/src/script_opt/Inline.cc b/src/script_opt/Inline.cc index d299c815b07..56d71ba366b 100644 --- a/src/script_opt/Inline.cc +++ b/src/script_opt/Inline.cc @@ -5,6 +5,7 @@ #include "zeek/Desc.h" #include "zeek/EventRegistry.h" #include "zeek/module_util.h" +#include "zeek/script_opt/FuncInfo.h" #include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/StmtOptInfo.h" @@ -23,6 +24,16 @@ void Inliner::Analyze() { // Prime the call set for each function with the functions it // directly calls. for ( auto& f : funcs ) { + // For any function explicitly known to the event engine, it can + // be hard to analyze whether there's a possibility that when + // executing the function, doing so will tickle the event engine + // into calling it recursively. So we remove these up front. + // + // We deal with cases where these defaults are overridden to refer + // to some other function below, when we go through indirect functions. + if ( is_special_script_func(f.Func()->Name()) ) + continue; + std::unordered_set cs; // Aspirational .... @@ -40,6 +51,32 @@ void Inliner::Analyze() { } call_set[f.Func()] = cs; + + for ( auto& ind_func : f.Profile()->IndirectFuncs() ) { + auto& v = ind_func->GetVal(); + if ( ! v ) + // Global doesn't correspond to an actual function body. + continue; + + auto vf = v->AsFunc(); + if ( vf->GetKind() != BuiltinFunc::SCRIPT_FUNC ) + // Not of analysis interest. + continue; + + auto sf = static_cast(vf); + + // If we knew transitively that the function lead to any + // indirect calls, nor calls to unsafe BiFs that themselves + // might do so, then we could know that this function isn't + // recursive via indirection. It's not clear, however, that + // identifying such cases is worth the trouble, other than + // for cutting down noise from the following recursion report. + + if ( report_recursive ) + printf("%s is used indirectly, and thus potentially recursively\n", sf->Name()); + + non_recursive_funcs.erase(sf); + } } // Transitive closure. If we had any self-respect, we'd implement diff --git a/src/script_opt/ObjMgr.h b/src/script_opt/ObjMgr.h index 4a8f35695c8..93698ef4ae5 100644 --- a/src/script_opt/ObjMgr.h +++ b/src/script_opt/ObjMgr.h @@ -18,6 +18,8 @@ #include #include +#include "zeek/IntrusivePtr.h" + namespace zeek::detail { // A class that keeps a const Obj* pointer live - used to isolate instances diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index fc5abc6cbde..0457482703e 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -193,22 +193,21 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { all_globals.insert(id); const auto& t = id->GetType(); - if ( t->Tag() == TYPE_FUNC && t->AsFuncType()->Flavor() == FUNC_FLAVOR_EVENT ) - events.insert(id->Name()); + if ( t->Tag() == TYPE_FUNC ) + if ( t->AsFuncType()->Flavor() == FUNC_FLAVOR_EVENT ) + events.insert(id->Name()); break; } - // This is a tad ugly. Unfortunately due to the - // weird way that Zeek function *declarations* work, - // there's no reliable way to get the list of - // parameters for a function *definition*, since - // they can have different names than what's present - // in the declaration. So we identify them directly, - // by knowing that they come at the beginning of the - // frame ... and being careful to avoid misconfusing - // a lambda capture with a low frame offset as a - // parameter. + // This is a tad ugly. Unfortunately due to the weird way + // that Zeek function *declarations* work, there's no reliable + // way to get the list of parameters for a function *definition*, + // since they can have different names than what's present in the + // declaration. So we identify them directly, by knowing that + // they come at the beginning of the frame ... and being careful + // to avoid misconfusing a lambda capture with a low frame offset + // as a parameter. if ( captures.count(id) == 0 && id->Offset() < num_params ) params.insert(id); @@ -251,11 +250,25 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { case EXPR_REMOVE_FROM: case EXPR_ASSIGN: { auto lhs = e->GetOp1(); + bool is_assign = e->Tag() == EXPR_ASSIGN; + + if ( is_assign ) { + // Check for this being an assignment to a function (as + // opposed to a call). If so, then the function can be + // used indirectly. + auto rhs = e->GetOp2(); + if ( rhs->Tag() == EXPR_NAME ) { + auto& rhs_id = rhs->AsNameExpr()->IdPtr(); + const auto& t = rhs_id->GetType(); + if ( t->Tag() == TYPE_FUNC && t->AsFuncType()->Flavor() == FUNC_FLAVOR_FUNCTION ) + indirect_funcs.insert(rhs_id.get()); + } + } if ( lhs->Tag() == EXPR_REF ) lhs = lhs->GetOp1(); - else if ( e->Tag() == EXPR_ASSIGN ) + else if ( is_assign ) // This isn't a direct assignment, but instead an overloaded // use of "=" such as in a table constructor. break; @@ -267,7 +280,7 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { auto id = lhs->AsNameExpr()->Id(); TrackAssignment(id); - if ( e->Tag() == EXPR_ASSIGN ) { + if ( is_assign ) { auto a_e = static_cast(e); auto& av = a_e->AssignVal(); if ( av ) @@ -287,7 +300,7 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { // assignment "a[b] = aggr", it's not a[b]'s type but // rather a's type. However, for any of the others, // e.g. "a[b] -= aggr" it is a[b]'s type. - if ( e->Tag() == EXPR_ASSIGN ) + if ( is_assign ) aggr_mods.insert(lhs_aggr_t.get()); else aggr_mods.insert(lhs_t.get()); @@ -325,21 +338,40 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { case EXPR_CALL: { auto c = e->AsCallExpr(); + auto args = c->Args(); auto f = c->Func(); - if ( f->Tag() != EXPR_NAME ) { - does_indirect_calls = true; - return TC_CONTINUE; - } + const NameExpr* n = nullptr; + const ID* func = nullptr; - auto n = f->AsNameExpr(); - auto func = n->Id(); + if ( f->Tag() == EXPR_NAME ) { + n = f->AsNameExpr(); + func = n->Id(); - if ( ! func->IsGlobal() ) { + if ( ! func->IsGlobal() ) + does_indirect_calls = true; + } + else does_indirect_calls = true; - return TC_CONTINUE; + + // Check for whether any of the arguments is a bare function. + // If so, then note that that function may be used indirectly, + // unless the function being called is known to be idempotent. + if ( does_indirect_calls || ! is_idempotent(func->Name()) ) { + for ( auto& arg : args->Exprs() ) + if ( arg->Tag() == EXPR_NAME ) { + auto& arg_id = arg->AsNameExpr()->IdPtr(); + const auto& t = arg_id->GetType(); + if ( t->Tag() == TYPE_FUNC && t->AsFuncType()->Flavor() == FUNC_FLAVOR_FUNCTION ) + indirect_funcs.insert(arg_id.get()); + } } + if ( does_indirect_calls ) + // We waited on doing this until after checking for + // indirect functions. + return TC_CONTINUE; + all_globals.insert(func); auto func_v = func->GetVal(); @@ -361,7 +393,6 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { } // Recurse into the arguments. - auto args = c->Args(); args->Traverse(this); // Do the following explicitly, since we won't be recursing @@ -604,7 +635,7 @@ bool ProfileFuncs::GetCallSideEffects(const NameExpr* n, IDSet& non_local_ids, T auto func = fv->AsFunc(); if ( func->GetKind() == Func::BUILTIN_FUNC ) { - if ( ! is_side_effect_free(func->Name()) ) + if ( ! has_no_script_side_effects(func->Name()) ) is_unknown = true; return true; } @@ -911,8 +942,11 @@ p_hash_type ProfileFuncs::HashType(const Type* t) { // We don't hash the field name, as in some contexts // those are ignored. - if ( f->attrs && do_hash ) - h = merge_p_hashes(h, HashAttrs(f->attrs)); + if ( f->attrs ) { + if ( do_hash ) + h = merge_p_hashes(h, HashAttrs(f->attrs)); + AnalyzeAttrs(f->attrs.get(), ft); + } } } break; @@ -929,24 +963,8 @@ p_hash_type ProfileFuncs::HashType(const Type* t) { auto ft = t->AsFuncType(); auto flv = ft->FlavorString(); h = merge_p_hashes(h, p_hash(flv)); - - // We deal with the parameters individually, rather than just - // recursing into the RecordType that's used (for convenience) - // to represent them. We do so because their properties are - // somewhat different - in particular, an &default on a parameter - // field is resolved in the context of the caller, not the - // function itself, and so we don't want to track those as - // attributes associated with the function body's execution. h = merge_p_hashes(h, p_hash("params")); - auto params = ft->Params()->Types(); - - if ( params ) { - h = merge_p_hashes(h, p_hash(params->length())); - - for ( auto p : *params ) - h = merge_p_hashes(h, HashType(p->type)); - } - + h = merge_p_hashes(h, HashType(ft->Params())); h = merge_p_hashes(h, p_hash("func-yield")); h = merge_p_hashes(h, HashType(ft->Yield())); } break; @@ -1150,7 +1168,7 @@ bool ProfileFuncs::DefinitelyHasNoSideEffects(const ExprPtr& e) const { return false; for ( auto& b : pf->BiFGlobals() ) - if ( ! is_side_effect_free(b->Name()) ) + if ( ! has_no_script_side_effects(b->Name()) ) return false; return true; @@ -1241,7 +1259,7 @@ bool ProfileFuncs::AssessSideEffects(const ProfileFunc* pf, IDSet& non_local_ids } for ( auto& b : pf->BiFGlobals() ) - if ( ! is_side_effect_free(b->Name()) ) { + if ( ! has_no_script_side_effects(b->Name()) ) { is_unknown = true; return true; } diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index 27c68a215c7..55814a96863 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -120,6 +120,7 @@ class ProfileFunc : public TraversalCallback { const std::unordered_set& TypeSwitches() const { return type_switches; } bool DoesIndirectCalls() const { return does_indirect_calls; } + const IDSet& IndirectFuncs() const { return indirect_funcs; } int NumParams() const { return num_params; } int NumLambdas() const { return lambdas.size(); } @@ -271,6 +272,11 @@ class ProfileFunc : public TraversalCallback { // than simply a function's (global) name. bool does_indirect_calls = false; + // Functions (not hooks or event handlers) that are referred to in + // a context other than being called. These might be used elsewhere + // for indirect calls. + IDSet indirect_funcs; + // Additional values present in the body that should be factored // into its hash. std::vector addl_hashes; diff --git a/src/script_opt/Reduce.cc b/src/script_opt/Reduce.cc index 3a0fb8ba0d3..d45bf0fa1d3 100644 --- a/src/script_opt/Reduce.cc +++ b/src/script_opt/Reduce.cc @@ -2,23 +2,15 @@ #include "zeek/script_opt/Reduce.h" -#include "zeek/Desc.h" -#include "zeek/Expr.h" -#include "zeek/Func.h" -#include "zeek/ID.h" -#include "zeek/Reporter.h" -#include "zeek/Scope.h" -#include "zeek/Stmt.h" -#include "zeek/Var.h" +#include "zeek/script_opt/CSE.h" #include "zeek/script_opt/ExprOptInfo.h" -#include "zeek/script_opt/FuncInfo.h" #include "zeek/script_opt/StmtOptInfo.h" #include "zeek/script_opt/TempVar.h" namespace zeek::detail { -Reducer::Reducer(const ScriptFunc* func, std::shared_ptr _pf, ProfileFuncs& _pfs) - : pf(std::move(_pf)), pfs(_pfs) { +Reducer::Reducer(const ScriptFuncPtr& func, std::shared_ptr _pf, std::shared_ptr _pfs) + : pf(std::move(_pf)), pfs(std::move(_pfs)) { auto& ft = func->GetType(); // Track the parameters so we don't remap them. @@ -442,15 +434,15 @@ bool Reducer::ExprValid(const ID* id, const Expr* e1, const Expr* e2) const { auto aggr = e1->GetOp1(); auto aggr_t = aggr->GetType(); - if ( pfs.HasSideEffects(SideEffectsOp::READ, aggr_t) ) + if ( pfs->HasSideEffects(SideEffectsOp::READ, aggr_t) ) has_side_effects = true; - else if ( aggr_t->Tag() == TYPE_TABLE && pfs.IsTableWithDefaultAggr(aggr_t.get()) ) + else if ( aggr_t->Tag() == TYPE_TABLE && pfs->IsTableWithDefaultAggr(aggr_t.get()) ) has_side_effects = true; } else if ( e1->Tag() == EXPR_RECORD_CONSTRUCTOR || e1->Tag() == EXPR_RECORD_COERCE ) - has_side_effects = pfs.HasSideEffects(SideEffectsOp::CONSTRUCTION, e1->GetType()); + has_side_effects = pfs->HasSideEffects(SideEffectsOp::CONSTRUCTION, e1->GetType()); e1_se = ExprSideEffects(has_side_effects); } @@ -825,265 +817,6 @@ std::shared_ptr Reducer::FindTemporary(const ID* id) const { return tmp->second; } -CSE_ValidityChecker::CSE_ValidityChecker(ProfileFuncs& _pfs, const std::vector& _ids, const Expr* _start_e, - const Expr* _end_e) - : pfs(_pfs), ids(_ids) { - start_e = _start_e; - end_e = _end_e; - - // Track whether this is a record assignment, in which case - // we're attuned to assignments to the same field for the - // same type of record. - if ( start_e->Tag() == EXPR_FIELD ) { - field = start_e->AsFieldExpr()->Field(); - - // Track the type of the record, too, so we don't confuse - // field references to different records that happen to - // have the same offset as potential aliases. - field_type = start_e->GetOp1()->GetType(); - } - - else - field = -1; // flags that there's no relevant field -} - -TraversalCode CSE_ValidityChecker::PreStmt(const Stmt* s) { - auto t = s->Tag(); - - if ( t == STMT_WHEN ) { - // These are too hard to analyze - they result in lambda calls - // that can affect aggregates, etc. - is_valid = false; - return TC_ABORTALL; - } - - if ( t == STMT_ADD || t == STMT_DELETE ) - in_aggr_mod_stmt = true; - - return TC_CONTINUE; -} - -TraversalCode CSE_ValidityChecker::PostStmt(const Stmt* s) { - if ( s->Tag() == STMT_ADD || s->Tag() == STMT_DELETE ) - in_aggr_mod_stmt = false; - - return TC_CONTINUE; -} - -TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) { - if ( e == start_e ) { - ASSERT(! have_start_e); - have_start_e = true; - - // Don't analyze the expression, as it's our starting - // point and we don't want to conflate its properties - // with those of any intervening expressions. - return TC_CONTINUE; - } - - if ( e == end_e ) { - if ( ! have_start_e ) - reporter->InternalError("CSE_ValidityChecker: saw end but not start"); - - ASSERT(! have_end_e); - have_end_e = true; - - // ... and we're now done. - return TC_ABORTALL; - } - - if ( ! have_start_e ) - // We don't yet have a starting point. - return TC_CONTINUE; - - // We have a starting point, and not yet an ending point. - auto t = e->Tag(); - - switch ( t ) { - case EXPR_ASSIGN: { - auto lhs_ref = e->GetOp1()->AsRefExprPtr(); - auto lhs = lhs_ref->GetOp1()->AsNameExpr(); - - if ( CheckID(lhs->Id(), false) ) - return TC_ABORTALL; - - // Note, we don't use CheckAggrMod() because this is a plain - // assignment. It might be changing a variable's binding to - // an aggregate ("aggr_var = new_aggr_val"), but we don't - // introduce temporaries that are simply aliases of existing - // variables (e.g., we don't have "::#8 = aggr_var"), - // and so there's no concern that the temporary could now be - // referring to the wrong aggregate. If instead we have - // "::#8 = aggr_var$foo", then a reassignment here - // to "aggr_var" will already be caught by CheckID(). - } break; - - case EXPR_INDEX_ASSIGN: { - auto lhs_aggr = e->GetOp1(); - auto lhs_aggr_id = lhs_aggr->AsNameExpr()->Id(); - - if ( CheckID(lhs_aggr_id, true) || CheckTableMod(lhs_aggr->GetType()) ) - return TC_ABORTALL; - } break; - - case EXPR_FIELD_LHS_ASSIGN: { - auto lhs = e->GetOp1(); - auto lhs_aggr_id = lhs->AsNameExpr()->Id(); - auto lhs_field = e->AsFieldLHSAssignExpr()->Field(); - - if ( CheckID(lhs_aggr_id, true) ) - return TC_ABORTALL; - if ( lhs_field == field && same_type(lhs_aggr_id->GetType(), field_type) ) { - is_valid = false; - return TC_ABORTALL; - } - } break; - - case EXPR_APPEND_TO: - // This doesn't directly change any identifiers, but does - // alter an aggregate. - if ( CheckAggrMod(e->GetType()) ) - return TC_ABORTALL; - break; - - case EXPR_CALL: - if ( CheckCall(e->AsCallExpr()) ) - return TC_ABORTALL; - break; - - case EXPR_TABLE_CONSTRUCTOR: - // These have EXPR_ASSIGN's in them that don't - // correspond to actual assignments to variables, - // so we don't want to traverse them. - return TC_ABORTSTMT; - - case EXPR_RECORD_COERCE: - case EXPR_RECORD_CONSTRUCTOR: - // Note, record coercion behaves like constructors in terms of - // potentially executing &default functions. In either case, - // the type of the expression reflects the type we want to analyze - // for side effects. - if ( CheckRecordConstructor(e->GetType()) ) - return TC_ABORTALL; - break; - - case EXPR_INDEX: - case EXPR_FIELD: { - // We treat these together because they both have to be checked - // when inside an "add" or "delete" statement. - auto aggr = e->GetOp1(); - auto aggr_t = aggr->GetType(); - - if ( in_aggr_mod_stmt ) { - auto aggr_id = aggr->AsNameExpr()->Id(); - - if ( CheckID(aggr_id, true) || CheckAggrMod(aggr_t) ) - return TC_ABORTALL; - } - - else if ( t == EXPR_INDEX && aggr_t->Tag() == TYPE_TABLE ) { - if ( CheckTableRef(aggr_t) ) - return TC_ABORTALL; - } - } break; - - default: break; - } - - return TC_CONTINUE; -} - -bool CSE_ValidityChecker::CheckID(const ID* id, bool ignore_orig) { - for ( auto i : ids ) { - if ( ignore_orig && i == ids.front() ) - continue; - - if ( id == i ) - return Invalid(); // reassignment - } - - return false; -} - -bool CSE_ValidityChecker::CheckAggrMod(const TypePtr& t) { - if ( ! IsAggr(t) ) - return false; - - for ( auto i : ids ) - if ( same_type(t, i->GetType()) ) - return Invalid(); - - return false; -} - -bool CSE_ValidityChecker::CheckRecordConstructor(const TypePtr& t) { - if ( t->Tag() != TYPE_RECORD ) - return false; - - return CheckSideEffects(SideEffectsOp::CONSTRUCTION, t); -} - -bool CSE_ValidityChecker::CheckTableMod(const TypePtr& t) { - if ( CheckAggrMod(t) ) - return true; - - if ( t->Tag() != TYPE_TABLE ) - return false; - - return CheckSideEffects(SideEffectsOp::WRITE, t); -} - -bool CSE_ValidityChecker::CheckTableRef(const TypePtr& t) { return CheckSideEffects(SideEffectsOp::READ, t); } - -bool CSE_ValidityChecker::CheckCall(const CallExpr* c) { - auto func = c->Func(); - std::string desc; - if ( func->Tag() != EXPR_NAME ) - // Can't analyze indirect calls. - return Invalid(); - - IDSet non_local_ids; - TypeSet aggrs; - bool is_unknown = false; - - auto resolved = pfs.GetCallSideEffects(func->AsNameExpr(), non_local_ids, aggrs, is_unknown); - ASSERT(resolved); - - if ( is_unknown || CheckSideEffects(non_local_ids, aggrs) ) - return Invalid(); - - return false; -} - -bool CSE_ValidityChecker::CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) { - IDSet non_local_ids; - TypeSet aggrs; - - if ( pfs.GetSideEffects(access, t.get(), non_local_ids, aggrs) ) - return Invalid(); - - return CheckSideEffects(non_local_ids, aggrs); -} - -bool CSE_ValidityChecker::CheckSideEffects(const IDSet& non_local_ids, const TypeSet& aggrs) { - if ( non_local_ids.empty() && aggrs.empty() ) - // This is far and away the most common case. - return false; - - for ( auto i : ids ) { - for ( auto nli : non_local_ids ) - if ( nli == i ) - return Invalid(); - - auto i_t = i->GetType(); - for ( auto a : aggrs ) - if ( same_type(a, i_t.get()) ) - return Invalid(); - } - - return false; -} - const Expr* non_reduced_perp; bool checking_reduction; diff --git a/src/script_opt/Reduce.h b/src/script_opt/Reduce.h index b4f4cb47434..2bb1036445d 100644 --- a/src/script_opt/Reduce.h +++ b/src/script_opt/Reduce.h @@ -2,10 +2,6 @@ #pragma once -#include "zeek/Expr.h" -#include "zeek/Scope.h" -#include "zeek/Stmt.h" -#include "zeek/Traverse.h" #include "zeek/script_opt/ObjMgr.h" #include "zeek/script_opt/ProfileFunc.h" @@ -16,7 +12,7 @@ class TempVar; class Reducer { public: - Reducer(const ScriptFunc* func, std::shared_ptr pf, ProfileFuncs& pfs); + Reducer(const ScriptFuncPtr& func, std::shared_ptr pf, std::shared_ptr pfs); StmtPtr Reduce(StmtPtr s); @@ -236,7 +232,7 @@ class Reducer { std::shared_ptr pf; // Profile across all script functions - used for optimization decisions. - ProfileFuncs& pfs; + std::shared_ptr pfs; // Tracks the temporary variables created during the reduction/ // optimization process. @@ -314,104 +310,6 @@ class Reducer { bool opt_ready = false; }; -// Helper class that walks an AST to determine whether it's safe -// to substitute a common subexpression (which at this point is -// an assignment to a variable) created using the assignment -// expression at position "start_e", at the location specified by -// the expression at position "end_e". -// -// See Reducer::ExprValid for a discussion of what's required -// for safety. - -class CSE_ValidityChecker : public TraversalCallback { -public: - CSE_ValidityChecker(ProfileFuncs& pfs, const std::vector& ids, const Expr* start_e, const Expr* end_e); - - TraversalCode PreStmt(const Stmt*) override; - TraversalCode PostStmt(const Stmt*) override; - TraversalCode PreExpr(const Expr*) override; - - // Returns the ultimate verdict re safety. - bool IsValid() const { - if ( ! is_valid ) - return false; - - if ( ! have_end_e ) - reporter->InternalError("CSE_ValidityChecker: saw start but not end"); - return true; - } - -protected: - // Returns true if an assignment involving the given identifier on - // the LHS is in conflict with the identifiers we're tracking. - bool CheckID(const ID* id, bool ignore_orig); - - // Returns true if a modification to an aggregate of the given type - // potentially aliases with one of the identifiers we're tracking. - bool CheckAggrMod(const TypePtr& t); - - // Returns true if a record constructor/coercion of the given type has - // side effects and invalides the CSE opportunity. - bool CheckRecordConstructor(const TypePtr& t); - - // The same for modifications to tables. - bool CheckTableMod(const TypePtr& t); - - // The same for accessing (reading) tables. - bool CheckTableRef(const TypePtr& t); - - // The same for the given function call. - bool CheckCall(const CallExpr* c); - - // True if the given form of access to the given type has side effects. - bool CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t); - - // True if side effects to the given identifiers and aggregates invalidate - // the CSE opportunity. - bool CheckSideEffects(const IDSet& non_local_ids, const TypeSet& aggrs); - - // Helper function that marks the CSE opportunity as invalid and returns - // "true" (used by various methods to signal invalidation). - bool Invalid() { - is_valid = false; - return true; - } - - // Profile across all script functions. - ProfileFuncs& pfs; - - // The list of identifiers for which an assignment to one of them - // renders the CSE unsafe. - const std::vector& ids; - - // Where in the AST to start our analysis. This is the initial - // assignment expression. - const Expr* start_e; - - // Where in the AST to end our analysis. - const Expr* end_e; - - // If what we're analyzing is a record element, then its offset. - // -1 if not. - int field; - - // The type of that record element, if any. - TypePtr field_type; - - // The verdict so far. - bool is_valid = true; - - // Whether we've encountered the start/end expression in - // the AST traversal. - bool have_start_e = false; - bool have_end_e = false; - - // Whether analyzed expressions occur in the context of a statement - // that modifies an aggregate ("add" or "delete"), which changes the - // interpretation of the expressions. - bool in_aggr_mod_stmt = false; -}; - // Used for debugging, to communicate which expression wasn't // reduced when we expected them all to be. extern const Expr* non_reduced_perp; diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 1ca1199528a..34ffbf4c726 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -122,9 +122,9 @@ bool should_analyze(const ScriptFuncPtr& f, const StmtPtr& body) { return false; } -static bool optimize_AST(ScriptFunc* f, std::shared_ptr& pf, std::shared_ptr& rc, ScopePtr scope, - StmtPtr& body) { - pf = std::make_shared(f, body, true); +static bool optimize_AST(ScriptFuncPtr f, std::shared_ptr& pf, std::shared_ptr& rc, + ScopePtr scope, StmtPtr& body) { + pf = std::make_shared(f.get(), body, true); GenIDDefs ID_defs(pf, f, scope, body); @@ -147,8 +147,8 @@ static bool optimize_AST(ScriptFunc* f, std::shared_ptr& pf, std::s return true; } -static void optimize_func(ScriptFunc* f, std::shared_ptr pf, ProfileFuncs& pfs, ScopePtr scope, - StmtPtr& body) { +static void optimize_func(ScriptFuncPtr f, std::shared_ptr pf, std::shared_ptr pfs, + ScopePtr scope, StmtPtr& body) { if ( reporter->Errors() > 0 ) return; @@ -201,7 +201,7 @@ static void optimize_func(ScriptFunc* f, std::shared_ptr pf, Profil } // Profile the new body. - pf = std::make_shared(f, body, true); + pf = std::make_shared(f.get(), body, true); // Compute its reaching definitions. GenIDDefs ID_defs(pf, f, scope, body); @@ -372,6 +372,8 @@ static void use_CPP() { int num_used = 0; + auto pfs = std::make_unique(funcs, is_CPP_compilable, false); + for ( auto& f : funcs ) { auto hash = f.Profile()->HashVal(); auto s = compiled_scripts.find(hash); @@ -420,9 +422,9 @@ static void generate_CPP() { const bool standalone = analysis_options.gen_standalone_CPP; const bool report = analysis_options.report_uncompilable; - auto pfs = std::make_unique(funcs, is_CPP_compilable, false); + auto pfs = std::make_shared(funcs, is_CPP_compilable, false); - CPPCompile cpp(funcs, *pfs, gen_name, standalone, report); + CPPCompile cpp(funcs, pfs, gen_name, standalone, report); } static void analyze_scripts_for_ZAM() { @@ -433,7 +435,7 @@ static void analyze_scripts_for_ZAM() { analysis_options.optimize_AST = false; } - auto pfs = std::make_unique(funcs, nullptr, true); + auto pfs = std::make_shared(funcs, nullptr, true); bool report_recursive = analysis_options.report_recursive; std::unique_ptr inl; @@ -471,12 +473,12 @@ static void analyze_scripts_for_ZAM() { if ( ! f.ShouldAnalyze() ) continue; - auto func = f.Func(); - auto l = lambdas.find(func); + auto& func = f.FuncPtr(); + auto l = lambdas.find(func.get()); bool is_lambda = l != lambdas.end(); - if ( ! analysis_options.compile_all && ! is_lambda && inl && inl->WasFullyInlined(func) && - func_used_indirectly.count(func) == 0 ) { + if ( ! analysis_options.compile_all && ! is_lambda && inl && inl->WasFullyInlined(func.get()) && + func_used_indirectly.count(func.get()) == 0 ) { // No need to compile as it won't be called directly. // We'd like to zero out the body to recover the // memory, but a *few* such functions do get called, @@ -487,7 +489,7 @@ static void analyze_scripts_for_ZAM() { } auto new_body = f.Body(); - optimize_func(func, f.ProfilePtr(), *pfs, f.Scope(), new_body); + optimize_func(func, f.ProfilePtr(), pfs, f.Scope(), new_body); f.SetBody(new_body); if ( is_lambda ) diff --git a/src/script_opt/ZAM/AM-Opt.cc b/src/script_opt/ZAM/AM-Opt.cc index deb5fbbf693..dbb5804a583 100644 --- a/src/script_opt/ZAM/AM-Opt.cc +++ b/src/script_opt/ZAM/AM-Opt.cc @@ -634,8 +634,9 @@ void ZAMCompiler::ReMapInterpreterFrame() { // Update frame sizes for functions that might have more than // one body. - if ( remapped_intrp_frame_sizes.count(func) == 0 || remapped_intrp_frame_sizes[func] < next_interp_slot ) - remapped_intrp_frame_sizes[func] = next_interp_slot; + auto f = func.get(); + if ( remapped_intrp_frame_sizes.count(f) == 0 || remapped_intrp_frame_sizes[f] < next_interp_slot ) + remapped_intrp_frame_sizes[f] = next_interp_slot; } void ZAMCompiler::ReMapVar(const ID* id, int slot, zeek_uint_t inst) { diff --git a/src/script_opt/ZAM/Compile.h b/src/script_opt/ZAM/Compile.h index e25ac57fcc5..686c92d40de 100644 --- a/src/script_opt/ZAM/Compile.h +++ b/src/script_opt/ZAM/Compile.h @@ -52,8 +52,8 @@ class OpaqueVals { class ZAMCompiler { public: - ZAMCompiler(ScriptFunc* f, ProfileFuncs& pfs, std::shared_ptr pf, ScopePtr scope, StmtPtr body, - std::shared_ptr ud, std::shared_ptr rd); + ZAMCompiler(ScriptFuncPtr f, std::shared_ptr pfs, std::shared_ptr pf, ScopePtr scope, + StmtPtr body, std::shared_ptr ud, std::shared_ptr rd); ~ZAMCompiler(); StmtPtr CompileBody(); @@ -501,8 +501,8 @@ class ZAMCompiler { // (and/or no return value generated). std::vector retvars; - ScriptFunc* func; - ProfileFuncs& pfs; + ScriptFuncPtr func; + std::shared_ptr pfs; std::shared_ptr pf; ScopePtr scope; StmtPtr body; diff --git a/src/script_opt/ZAM/Driver.cc b/src/script_opt/ZAM/Driver.cc index be6e165907d..5840d56a9fc 100644 --- a/src/script_opt/ZAM/Driver.cc +++ b/src/script_opt/ZAM/Driver.cc @@ -13,10 +13,10 @@ namespace zeek::detail { -ZAMCompiler::ZAMCompiler(ScriptFunc* f, ProfileFuncs& _pfs, std::shared_ptr _pf, ScopePtr _scope, - StmtPtr _body, std::shared_ptr _ud, std::shared_ptr _rd) - : pfs(_pfs) { - func = f; +ZAMCompiler::ZAMCompiler(ScriptFuncPtr f, std::shared_ptr _pfs, std::shared_ptr _pf, + ScopePtr _scope, StmtPtr _body, std::shared_ptr _ud, std::shared_ptr _rd) { + func = std::move(f); + pfs = std::move(_pfs); pf = std::move(_pf); scope = std::move(_scope); body = std::move(_body); @@ -42,7 +42,7 @@ void ZAMCompiler::Init() { TrackMemoryManagement(); - non_recursive = non_recursive_funcs.count(func) > 0; + non_recursive = non_recursive_funcs.count(func.get()) > 0; } void ZAMCompiler::InitGlobals() { @@ -210,8 +210,7 @@ void ZAMCompiler::ResolveHookBreaks() { // Rewrite the breaks. for ( auto& b : breaks[0] ) { auto& i = insts1[b.stmt_num]; - delete i; - i = new ZInstI(OP_HOOK_BREAK_X); + *i = ZInstI(OP_HOOK_BREAK_X); } } diff --git a/src/script_opt/ZAM/Expr.cc b/src/script_opt/ZAM/Expr.cc index 4bcde31aeaf..9f874edd0d4 100644 --- a/src/script_opt/ZAM/Expr.cc +++ b/src/script_opt/ZAM/Expr.cc @@ -418,7 +418,16 @@ const ZAMStmt ZAMCompiler::CompileInExpr(const NameExpr* n1, const NameExpr* n2, ZOp a; - if ( op2->GetType()->Tag() == TYPE_PATTERN ) + auto& op2_t = op2->GetType(); + auto& op3_t = op3->GetType(); + + if ( op3_t->Tag() == TYPE_TABLE ) { + if ( op3_t->AsTableType()->IsPatternIndex() && op2_t->Tag() == TYPE_STRING ) + a = n2 ? OP_STR_IN_PAT_TBL_VVV : OP_STR_IN_PAT_TBL_VCV; + else + a = n2 ? OP_VAL_IS_IN_TABLE_VVV : OP_CONST_IS_IN_TABLE_VCV; + } + else if ( op2->GetType()->Tag() == TYPE_PATTERN ) a = n2 ? (n3 ? OP_P_IN_S_VVV : OP_P_IN_S_VVC) : OP_P_IN_S_VCV; else if ( op2->GetType()->Tag() == TYPE_STRING ) @@ -427,9 +436,6 @@ const ZAMStmt ZAMCompiler::CompileInExpr(const NameExpr* n1, const NameExpr* n2, else if ( op2->GetType()->Tag() == TYPE_ADDR && op3->GetType()->Tag() == TYPE_SUBNET ) a = n2 ? (n3 ? OP_A_IN_S_VVV : OP_A_IN_S_VVC) : OP_A_IN_S_VCV; - else if ( op3->GetType()->Tag() == TYPE_TABLE ) - a = n2 ? OP_VAL_IS_IN_TABLE_VVV : OP_CONST_IS_IN_TABLE_VCV; - else reporter->InternalError("bad types when compiling \"in\""); @@ -650,23 +656,29 @@ const ZAMStmt ZAMCompiler::CompileIndex(const NameExpr* n1, int n2_slot, const T } if ( n2tag == TYPE_TABLE ) { - if ( n3 ) { + if ( is_pat_str_ind ) { + auto n1_slot = Frame1Slot(n1, OP1_WRITE); + if ( n3 ) { + int n3_slot = FrameSlot(n3); + z = ZInstI(OP_TABLE_PATSTR_INDEX_VVV, n1_slot, n2_slot, n3_slot); + } + else + z = ZInstI(OP_TABLE_PATSTR_INDEX_VVC, n1_slot, n2_slot, c3); + } + else if ( n3 ) { int n3_slot = FrameSlot(n3); - auto op = is_pat_str_ind ? OP_TABLE_PATSTR_INDEX1_VVV : OP_TABLE_INDEX1_VVV; - auto zop = AssignmentFlavor(op, n1->GetType()->Tag()); + auto zop = AssignmentFlavor(OP_TABLE_INDEX1_VVV, n1->GetType()->Tag()); z = ZInstI(zop, Frame1Slot(n1, zop), n2_slot, n3_slot); z.SetType(n3->GetType()); } else { ASSERT(c3); - - auto op = is_pat_str_ind ? OP_TABLE_PATSTR_INDEX1_VVC : OP_TABLE_INDEX1_VVC; - auto zop = AssignmentFlavor(op, n1->GetType()->Tag()); + auto zop = AssignmentFlavor(OP_TABLE_INDEX1_VVC, n1->GetType()->Tag()); z = ZInstI(zop, Frame1Slot(n1, zop), n2_slot, c3); } - if ( pfs.HasSideEffects(SideEffectsOp::READ, n2t) ) { + if ( pfs->HasSideEffects(SideEffectsOp::READ, n2t) ) { z.aux = new ZInstAux(0); z.aux->can_change_non_locals = true; } @@ -851,7 +863,7 @@ const ZAMStmt ZAMCompiler::AssignTableElem(const Expr* e) { z.aux = InternalBuildVals(op2); z.t = op3->GetType(); - if ( pfs.HasSideEffects(SideEffectsOp::WRITE, op1->GetType()) ) + if ( pfs->HasSideEffects(SideEffectsOp::WRITE, op1->GetType()) ) z.aux->can_change_non_locals = true; return AddInst(z); @@ -1013,7 +1025,7 @@ const ZAMStmt ZAMCompiler::DoCall(const CallExpr* c, const NameExpr* n) { TypeSet aggrs; bool is_unknown = false; - auto resolved = pfs.GetCallSideEffects(func, non_local_ids, aggrs, is_unknown); + auto resolved = pfs->GetCallSideEffects(func, non_local_ids, aggrs, is_unknown); ASSERT(resolved); if ( is_unknown || ! non_local_ids.empty() || ! aggrs.empty() ) @@ -1103,7 +1115,7 @@ const ZAMStmt ZAMCompiler::ConstructRecord(const NameExpr* n, const Expr* e) { z.t = e->GetType(); - if ( pfs.HasSideEffects(SideEffectsOp::CONSTRUCTION, z.t) ) + if ( pfs->HasSideEffects(SideEffectsOp::CONSTRUCTION, z.t) ) z.aux->can_change_non_locals = true; return AddInst(z); @@ -1202,7 +1214,7 @@ const ZAMStmt ZAMCompiler::RecordCoerce(const NameExpr* n, const Expr* e) { // Mark the integer entries in z.aux as not being frame slots as usual. z.aux->slots = nullptr; - if ( pfs.HasSideEffects(SideEffectsOp::CONSTRUCTION, e->GetType()) ) + if ( pfs->HasSideEffects(SideEffectsOp::CONSTRUCTION, e->GetType()) ) z.aux->can_change_non_locals = true; return AddInst(z); diff --git a/src/script_opt/ZAM/Ops.in b/src/script_opt/ZAM/Ops.in index dd092689bf1..bda9bd91dd5 100644 --- a/src/script_opt/ZAM/Ops.in +++ b/src/script_opt/ZAM/Ops.in @@ -696,6 +696,17 @@ internal-op P-In-S type VVC eval EvalPInS(frame[z.v2], z.c) +macro EvalStrInPatTbl(op1, op2) + frame[z.v1].int_val = op2.table_val->MatchPattern({NewRef{}, op1.string_val}); + +internal-op Str-In-Pat-Tbl +type VVV +eval EvalStrInPatTbl(frame[z.v2], frame[z.v3]) + +internal-op Str-In-Pat-Tbl +type VCV +eval EvalStrInPatTbl(z.c, frame[z.v2]) + internal-binary-op S-In-S op-accessor string_val op-type I @@ -1045,6 +1056,20 @@ type VV eval EvalTableIndex(z.aux->ToListVal(frame)) AssignV1(BuildVal(v, z.t)) +macro EvalTablePatStr(index) + auto& lhs = frame[z.v1]; + auto vec = ZVal(frame[z.v2].table_val->LookupPattern({NewRef{}, index.string_val})); + ZVal::DeleteManagedType(lhs); + lhs = vec; + +internal-op Table-PatStr-Index +type VVV +eval EvalTablePatStr(frame[z.v3]) + +internal-op Table-PatStr-Index +type VVC +eval EvalTablePatStr(z.c) + internal-op When-Table-Index type VV eval EvalTableIndex(z.aux->ToListVal(frame)) @@ -1080,17 +1105,6 @@ type VVC assign-val v eval EvalTableIndex(z.c.ToVal(z.t)) -# Same, but for indexing table[pattern] of X with a string. -internal-assignment-op Table-PatStr-Index1 -type VVV -assign-val v -eval auto v = frame[z.v2].table_val->LookupPattern({NewRef{}, frame[z.v3].AsString()}); - -internal-assignment-op Table-PatStr-Index1 -type VVC -assign-val v -eval auto v = frame[z.v2].table_val->LookupPattern({NewRef{}, z.c.AsString()}); - # This version is for a variable v3. internal-op Index-String type VVV @@ -1832,7 +1846,6 @@ op1-read type VVV eval auto v = frame[z.v1].subnet_val->AsSubNet().AsString(); EvalSwitchBody(str_cases,) - auto t = str_cases[z.v2]; internal-op Branch-If-Not-Type diff --git a/src/script_opt/ZAM/ZBody.cc b/src/script_opt/ZAM/ZBody.cc index 9b77da28ff0..1b409021554 100644 --- a/src/script_opt/ZAM/ZBody.cc +++ b/src/script_opt/ZAM/ZBody.cc @@ -197,8 +197,8 @@ void ZBody::SetInsts(vector& instsI) { if ( iI.stmt ) { auto l = iI.stmt->Original()->GetLocationInfo(); if ( l != &no_location ) - insts_copy[i].loc = std::make_shared(util::copy_string(l->filename), l->first_line, - l->last_line, l->first_column, l->last_column); + insts_copy[i].loc = std::make_shared(l->filename, l->first_line, l->last_line, + l->first_column, l->last_column); } } diff --git a/src/script_opt/ZAM/maint/BiFs.list b/src/script_opt/ZAM/maint/BiFs.list index d07fcb33b5c..a1e0c8190a6 100644 --- a/src/script_opt/ZAM/maint/BiFs.list +++ b/src/script_opt/ZAM/maint/BiFs.list @@ -113,12 +113,17 @@ Input::__force_update Input::__remove_stream Log::__add_filter Log::__create_stream +Log::__delay +Log::__delay_finish Log::__disable_stream Log::__enable_stream Log::__flush +Log::__get_delay_queue_size Log::__remove_filter Log::__remove_stream Log::__set_buf +Log::__set_max_delay_interval +Log::__set_max_delay_queue_size Log::__write Option::any_set_to_any_vec Option::set @@ -489,6 +494,7 @@ syslog system system_env table_keys +table_pattern_matcher_stats table_values terminate time_to_double diff --git a/src/script_opt/ZAM/maint/README b/src/script_opt/ZAM/maint/README index ee69f88149b..df16c2c5a2f 100644 --- a/src/script_opt/ZAM/maint/README +++ b/src/script_opt/ZAM/maint/README @@ -1,6 +1,20 @@ This directory holds scripts and associated data to support maintenance of ZAM optimization: +find-special-script-funcs.sh + A shell script that prints to stdout a sorted list of script-level + functions that the event engine knows about. Invoke with the path + to the top-level src/ directory. + + Use this to compare with Special-Script-Funcs.list to see + whether there are any new such functions (or old ones that have + been removed). If so, update src/script_opt/FuncInfo.cc and then + Special-Script-Funcs.list accordingly. + +Special-Script-Funcs.list + The known-to-the-event-engine scripts that were present last time + ZAM maintenance included looking for any updates to these. + list-bifs.zeek A Zeek script that prints to stdout a sorted list of the BiFs available for the Zeek invocation. diff --git a/src/script_opt/ZAM/maint/Special-Script-Funcs.list b/src/script_opt/ZAM/maint/Special-Script-Funcs.list new file mode 100644 index 00000000000..306c3c675bc --- /dev/null +++ b/src/script_opt/ZAM/maint/Special-Script-Funcs.list @@ -0,0 +1,14 @@ +Analyzer::disabling_analyzer +Log::__default_rotation_postprocessor +Log::empty_post_delay_cb +Log::log_stream_policy +Log::rotation_format_func +Supervisor::stderr_hook +Supervisor::stdout_hook +assertion_failure +assertion_result +discarder_check_icmp +discarder_check_ip +discarder_check_tcp +discarder_check_udp +from_json_default_key_mapper diff --git a/src/script_opt/ZAM/maint/find-special-script-funcs.sh b/src/script_opt/ZAM/maint/find-special-script-funcs.sh new file mode 100755 index 00000000000..65c5d12170a --- /dev/null +++ b/src/script_opt/ZAM/maint/find-special-script-funcs.sh @@ -0,0 +1,23 @@ +#! /bin/sh + +# Finds script functions known to the event engine by searching through +# the C++ code. Invoke with the top-level src/ directory as an argument. + +# Search for event engine code that looks up script functions. +grep -h -r -w find_func $* | + + # Trim out whatever is leading up to the name. + sed 's,.*find_func,,' | + + # Make sure we're dealing with a literal name in quotes. + grep '"' | + + # Don't be fooled by -O gen-C++, which has code-to-generate-code that + # uses find_Func. + grep -v '\\"' | + + # Get rid of the quotes. + sed 's,^[^"]*",,;s,"[^"]*$,,' | + + # Produce a regularized list for easy diff'ing. + sort -u diff --git a/testing/btest/opt/coalescence.zeek b/testing/btest/opt/coalescence.zeek index 9552c168ba0..3b95b4bffbf 100644 --- a/testing/btest/opt/coalescence.zeek +++ b/testing/btest/opt/coalescence.zeek @@ -1,4 +1,5 @@ # @TEST-DOC: Ensure that event coalescence works properly. +# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # # @TEST-EXEC: zeek -b -O ZAM %INPUT >output # @TEST-EXEC: btest-diff output diff --git a/testing/btest/opt/no-coalescence.zeek b/testing/btest/opt/no-coalescence.zeek index 2bd87bd4130..34751088128 100644 --- a/testing/btest/opt/no-coalescence.zeek +++ b/testing/btest/opt/no-coalescence.zeek @@ -1,4 +1,5 @@ # @TEST-DOC: Ensure that event coalescence doesn't happen if inlining turned off. +# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" # # @TEST-EXEC: zeek -b -O ZAM -O no-inline %INPUT >output # @TEST-EXEC: btest-diff output