From 7f8db32b6a672cce2e02beac14f0d16e58b32ee9 Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Tue, 21 Nov 2023 17:12:04 -0500 Subject: [PATCH 01/18] [InferAtomsPass] Revamp pass to support the latest LLVM and fix/refactor code Below are the key changes: - Use LLVM's new pass manager, a major improvement from the legacy one. - Fix a shortcoming of the inference algorithm to actually collect all uses of a fresh/consistent variable. - Optimize the inference cleanup algorithm to remove all instructions associated with the arguments of fresh/consistent annotations. - Thoroughly log debug messages throughout the components of the pass for a clearer view of the process. - Rename files, structs, functions, variables, etc. to be more descriptive and consistent. - General code style refactoring (e.g., use `auto` and structured bindings (destructuring) where possible). - Added simple C tests to `benchmarks/ctests`. --- .gitignore | 2 + readme.md => README.md | 0 benchmarks/ctests/example01.c | 21 + benchmarks/ctests/example02.c | 24 + ocelot/AtomicRegionInference/CMakeLists.txt | 18 +- ocelot/AtomicRegionInference/README.md | 22 +- .../AtomicRegionInference/src/CMakeLists.txt | 24 +- .../src/ConsistentInference.cpp | 505 -------- ocelot/AtomicRegionInference/src/Helpers.cpp | 36 + .../src/InferAtomicPass.cpp | 516 -------- .../AtomicRegionInference/src/InferAtoms.cpp | 529 ++++++++ .../src/InferFreshCons.cpp | 565 ++++++++ .../src/TaintTracker.cpp | 1138 +++++++++-------- .../src/include/ConsistentInference.h | 38 - .../src/include/HelperTypes.h | 93 +- .../src/include/Helpers.h | 15 + .../src/include/InferAtomicPass.h | 68 - .../src/include/InferAtoms.h | 54 + .../src/include/InferFreshCons.h | 36 + .../src/include/TaintTracker.h | 39 +- 20 files changed, 1961 insertions(+), 1782 deletions(-) create mode 100644 .gitignore rename readme.md => README.md (100%) create mode 100644 benchmarks/ctests/example01.c create mode 100644 benchmarks/ctests/example02.c delete mode 100644 ocelot/AtomicRegionInference/src/ConsistentInference.cpp create mode 100644 ocelot/AtomicRegionInference/src/Helpers.cpp delete mode 100644 ocelot/AtomicRegionInference/src/InferAtomicPass.cpp create mode 100644 ocelot/AtomicRegionInference/src/InferAtoms.cpp create mode 100644 ocelot/AtomicRegionInference/src/InferFreshCons.cpp delete mode 100644 ocelot/AtomicRegionInference/src/include/ConsistentInference.h create mode 100644 ocelot/AtomicRegionInference/src/include/Helpers.h delete mode 100644 ocelot/AtomicRegionInference/src/include/InferAtomicPass.h create mode 100644 ocelot/AtomicRegionInference/src/include/InferAtoms.h create mode 100644 ocelot/AtomicRegionInference/src/include/InferFreshCons.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5326aab --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.vscode +ocelot/AtomicRegionInference/build \ No newline at end of file diff --git a/readme.md b/README.md similarity index 100% rename from readme.md rename to README.md diff --git a/benchmarks/ctests/example01.c b/benchmarks/ctests/example01.c new file mode 100644 index 0000000..0e61a67 --- /dev/null +++ b/benchmarks/ctests/example01.c @@ -0,0 +1,21 @@ +#include + +// int x; +// int y; + +void Fresh(int x) { printf("Fresh\n"); } +void Consistent(int x, int id) { printf("Consistent\n"); } + +void atomic_start() {} +void atomic_end() {} + +int tmp() { return 0; } +int (*IO_NAME1)() = tmp; +void log(int x) {} + +int app() { + int x = tmp(); + Fresh(x); + log(x); + return 0; +} \ No newline at end of file diff --git a/benchmarks/ctests/example02.c b/benchmarks/ctests/example02.c new file mode 100644 index 0000000..1047d9e --- /dev/null +++ b/benchmarks/ctests/example02.c @@ -0,0 +1,24 @@ +void Fresh(int x) {} +void Consistent(int x, int id) {} + +void atomic_start() {} +void atomic_end() {} + +int sense() { return 0; } +int (*IO_NAME)() = sense; + +int norm(int t) { return t; } + +void log(int x) {} + +int tmp() { + int t = sense(); + int t_norm = norm(t); + return t_norm; +} + +void app() { + int x = tmp(); + Fresh(x); + log(x); +} \ No newline at end of file diff --git a/ocelot/AtomicRegionInference/CMakeLists.txt b/ocelot/AtomicRegionInference/CMakeLists.txt index 24e2883..81d46f5 100644 --- a/ocelot/AtomicRegionInference/CMakeLists.txt +++ b/ocelot/AtomicRegionInference/CMakeLists.txt @@ -1,8 +1,22 @@ -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.6) +project(InferAtoms) +# LLVM uses C++17. +set(CMAKE_CXX_STANDARD 17) + +# Load LLVMConfig.cmake. If this fails, consider setting `LLVM_DIR` to point +# to your LLVM installation's `lib/cmake/llvm` directory. find_package(LLVM REQUIRED CONFIG) + +# Include the part of LLVM's CMake libraries that defines +# `add_llvm_pass_plugin`. +include(AddLLVM) + +# Use LLVM's preprocessor definitions, include directories, and library search +# paths. add_definitions(${LLVM_DEFINITIONS}) include_directories(${LLVM_INCLUDE_DIRS}) link_directories(${LLVM_LIBRARY_DIRS}) -add_subdirectory(src) # Use your pass name here. +# Our pass lives in this subdirectory. +add_subdirectory(src) diff --git a/ocelot/AtomicRegionInference/README.md b/ocelot/AtomicRegionInference/README.md index d9a1c3a..38c61e6 100644 --- a/ocelot/AtomicRegionInference/README.md +++ b/ocelot/AtomicRegionInference/README.md @@ -1,14 +1,18 @@ -# region-inference-pass +# Atomic Region Inference -LLVM Pass for inferring atomic regions +LLVM Pass for inferring atomic regions. Tested to work with LLVM 17. -Build: +To build the pass: - $ mkdir build - $ cd build - $ cmake .. - $ make +```sh +mkdir build +cd build +cmake .. +make +``` -Run: +You may bootstrap Clang to use the pass to compile a C file like so: - $ opt -load build/src/libInferAtomicPass.so -atomize something.bc +```sh +clang -S -emit-llvm -fpass-plugin=src/InferAtomsPass.dylib -fno-discard-value-names ../../../benchmarks/ctests/example01.c +``` diff --git a/ocelot/AtomicRegionInference/src/CMakeLists.txt b/ocelot/AtomicRegionInference/src/CMakeLists.txt index 03033ee..e44f56e 100644 --- a/ocelot/AtomicRegionInference/src/CMakeLists.txt +++ b/ocelot/AtomicRegionInference/src/CMakeLists.txt @@ -1,23 +1,7 @@ -add_library(InferAtomicPass MODULE +add_llvm_pass_plugin(InferAtomsPass # List your source files here. - InferAtomicPass.cpp - ConsistentInference.cpp + InferAtoms.cpp TaintTracker.cpp + InferFreshCons.cpp + Helpers.cpp ) - -# Use C++11 to compile our pass (i.e., supply -std=c++11). -target_compile_features(InferAtomicPass PRIVATE cxx_range_for cxx_auto_type) - -# LLVM is (typically) built with no C++ RTTI. We need to match that; -# otherwise, we'll get linker errors about missing RTTI data. -set_target_properties(InferAtomicPass PROPERTIES - COMPILE_FLAGS "-fno-rtti" -) - -# Get proper shared-library behavior (where symbols are not necessarily -# resolved when the shared library is linked) on OS X. -if(APPLE) - set_target_properties(InferAtomicPass PROPERTIES - LINK_FLAGS "-undefined dynamic_lookup" - ) -endif(APPLE) diff --git a/ocelot/AtomicRegionInference/src/ConsistentInference.cpp b/ocelot/AtomicRegionInference/src/ConsistentInference.cpp deleted file mode 100644 index f59be4b..0000000 --- a/ocelot/AtomicRegionInference/src/ConsistentInference.cpp +++ /dev/null @@ -1,505 +0,0 @@ -#include "include/ConsistentInference.h" - -#define DEBUGINFER 0 -//Come back to this. it can crash and if pass not run with debug, shouldn't be needed -#if 0 -namespace { - - // Find closest debug info. Note that LLVM throws fatal error if we don't add debug info -// to call instructions that we insert (if the parent function has debug info). -DebugLoc findClosestDebugLoc(Instruction *instr) -{ - - DIScope *scope = instr->getFunction()->getSubprogram(); - Instruction *instrWithDebugLoc = instr; - while (!instrWithDebugLoc->getDebugLoc() && instrWithDebugLoc->getPrevNode() != NULL) - instrWithDebugLoc = instrWithDebugLoc->getPrevNode(); - if (instrWithDebugLoc->getDebugLoc()) // if found an instruction with info, use that info - return DebugLoc(instrWithDebugLoc->getDebugLoc()); - else // use the parent function's info (can't see any better source) - return DebugLoc::get(instr->getFunction()->getSubprogram()->getLine(), /* col */ 0, scope); -} - -} // namespace anon -#endif -using namespace std; -using namespace llvm; -Instruction* ConsistentInference::insertRegionInst(int toInsertType, Instruction* insertBefore) { - - Instruction* call; - IRBuilder<> builder(insertBefore); - //build and insert a region start inst - if (toInsertType == 0) { - //Constant* c = M->getOrInsertFunction(""); - call = builder.CreateCall(atomStart); - #if DEBUGINFER - errs() << "create start\n"; - #endif - } else { - //build and insert a region start inst - #if DEBUGINFER - errs() << "Inserting end at: "<< *insertBefore<<"\n"; - #endif - call = builder.CreateCall(atomEnd); - #if DEBUGINFER - errs() << "create end\n"; - #endif - } - return call; -} - -//if a direct pred is also a successor, then it's a for loop block -bool ConsistentInference::loopCheck(BasicBlock* bb) { - StringRef bbname = bb->getName().drop_front(2); - if (!bb->hasNPredecessors(1)) { - for (auto it = pred_begin(bb), et = pred_end(bb); it != et; ++it) { - BasicBlock* predecessor = *it; - StringRef pname = predecessor->getName().drop_front(2); - // errs() << "comparing " << pname<< " and " < 0) { - // errs() << "comparison is true\n"; - return true; - } - } - } - return false; -} - - -//find the first block after a for loop -BasicBlock* ConsistentInference::getLoopEnd(BasicBlock* bb) { - Instruction* ti = bb->getTerminator(); - BasicBlock* end = ti->getSuccessor(0); - ti = end->getTerminator(); - // errs() << "end is " << end->getName() << "\n"; - //for switch inst, succ 0 is the fall through - end = ti->getSuccessor(1); - // errs() << "end is " << end->getName() << "\n"; - return end; -} - -/*Top level region inference function -- could flatten later*/ -void ConsistentInference::inferConsistent(std::map allSets) -{ - //TODO: start with pseudo code structure from design doc - for( auto map : allSets ) { - #if DEBUGINFER - errs() << "DEBUGINFER: starting set " << map.first << "\n"; - #endif - addRegion(map.second, 0); - } - -} - -/*The only difference is outer map vs outer vec*/ -void ConsistentInference::inferFresh(inst_vec_vec allSets) -{ - //TODO: start with pseudo code structure from design doc - for( auto singleVec : allSets ) { - addRegion(singleVec, 1); - } - -} - -//Region type: 0 for Con, 1 for fresh -void ConsistentInference::addRegion(inst_vec conSet, int regionType) -{ - //construct a map of set item to bb - map blocks; - //a queue for regions that still need to be processed - queue> regionsNeeded; - - for(Instruction* item : conSet) { - blocks[item] = item->getParent(); - } - - regionsNeeded.push(blocks); - - Function* root; - for (Function& f : *m) { - if (f.getName().equals("app")) { - root = &f; - } - } - - //iterate until no more possible regions - //THEN pick the best one - vector> regionsFound; - while (!regionsNeeded.empty()) { - //need to raise all blocks in the map until - //they are the same - map blockMap = regionsNeeded.front(); - regionsNeeded.pop(); - //record which functions have been travelled through - set nested; - - while (!sameFunction(blockMap)) { - //to think on: does this change? - Function* goal = commonPredecessor(blockMap, root); - for (Instruction* item : conSet) { - //not all blocks need to be moved up - Function* currFunc = blockMap[item]->getParent(); - nested.insert(currFunc); - if(currFunc!=goal) { - - //if more than one call: - //callChain info is already in the starting set - //so only explore a caller if it's in conSet - bool first = true; - for(User* use : currFunc->users()) { - //if (regionType == 1) { - if(! (find(conSet.begin(), conSet.end(), use)!=conSet.end())) { - continue; - } - //errs() << "Use: "<< *use << " is in call chain\n"; - //} - Instruction* inst = dyn_cast(use); - #if DEBUGINFER - errs() << "DEBUGINFER: examining use: "<< *inst<<"\n"; - #endif - if (inst == NULL) { - //errs () <<"ERROR: use " << *use << "not an instruction\n"; - break; - } - //update the original map - if (first) { - blockMap[item] = inst->getParent(); - first = false; - } else { - //copy the blockmap, update, add to queue - Instruction* inst = dyn_cast(use); - map copy; - for(auto map : blockMap) { - copy[map.first] = map.second; - } - copy[item] = inst->getParent(); - regionsNeeded.push(copy); - } - }//end forall uses - }//end currFunc check - }//end forall items - }//end same function check - - - - /**Now, all bb in the map are in the same function, so we can run - * dom or post-dom analysis on that function**/ - #if DEBUGINFER - errs() << "DEBUGINFER: start dom tree analysis\n"; - #endif - Function* home = blockMap.begin()->second->getParent(); - if(home == nullptr) { - #if DEBUGINFER - errs() << "DEBUGINFER: no function found\n"; - #endif - continue; - } - DominatorTree& domTree = pass->getAnalysis(*home).getDomTree(); - //Find the closest point that dominates - BasicBlock* startDom = blockMap.begin()->second; - for (auto map : blockMap) { - startDom = domTree.findNearestCommonDominator(map.second, startDom); - } - //TODO: if an inst in the set is in the bb, we can truncate? - #if DEBUGINFER - errs() << "DEBUGINFER: start post dom tree analysis\n"; - #endif - //Flip directions for the region end - PostDominatorTree& postDomTree = pass->getAnalysis(*home).getPostDomTree(); - //Find the closest point that dominates - BasicBlock* endDom = blockMap.begin()->second; - for (auto map : blockMap) { - #if DEBUGINFER - if (endDom!=nullptr) { - errs() << "finding post dom of:" << map.second->getName()<< " and " << endDom->getName()<< "\n"; - } else { - errs() << "endDom is null\n"; - } - #endif - endDom = postDomTree.findNearestCommonDominator(map.second, endDom); - } - if (startDom==nullptr) { - errs() << "ERROR: null start\n"; - } else if (endDom==nullptr) { - errs() << "ERROR: null end\n"; - } - #if DEBUGINFER - errs() << "DEBUGINFER: match scope\n"; - #endif - //need to make the start and end dominate each other as well. - startDom = domTree.findNearestCommonDominator(startDom, endDom); - endDom = postDomTree.findNearestCommonDominator(startDom, endDom); - - //extra check to disallow loop conditional block as the end - if(loopCheck(endDom)) { - endDom = getLoopEnd(endDom); - } - - - - if (startDom==nullptr) { - errs() << "ERROR: null start after scope merge\n"; - } else if (endDom==nullptr) { - errs() << "ERROR: null end after scope merge\n"; - } -#if DEBUGINFER - errs() << "DEBUGINFER: insert insts\n"; -#endif - //TODO: fallback if endDom is null? Need hyper-blocks, I think - //possibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations? - Instruction* regionStart = truncate(startDom, true, conSet, nested); - Instruction* regionEnd = truncate(endDom, false, conSet, nested); - if (regionStart==nullptr) { - errs() << "ERROR: null start after truncation\n"; - } else if (regionEnd==nullptr) { - errs() << "ERROR: null end after truncation\n"; - } else { - //errs() << "Region start is before " << *regionStart<<" and region end is before " << *regionEnd<<"\n"; - } - - //insert into regions found - regionsFound.push_back(make_pair(regionStart, regionEnd)); - }//end while regions needed - - //now see which region is smallest -- instruction count? they must dominate - //each other, so there's no possibility of not running into the start from - //the end - pair smallestReg = findSmallest(regionsFound); - //errs() << "Smallest Region was " << *smallestReg.first<< " and " << *smallestReg.second <<"\n"; - Instruction* regionStart = smallestReg.first; - Instruction* regionEnd = smallestReg.second; - insertRegionInst(0, regionStart); - insertRegionInst(1, regionEnd); - //}//end while regions needed -} - -/*Function to truncate a bb if the instruction is in the bb */ -Instruction* ConsistentInference::truncate(BasicBlock* bb, bool forwards, inst_vec conSet, set nested) -{ - //truncate the front - if(forwards) { - for (Instruction& inst : *bb) { - //stop at first inst in the basic block that is in the set. - if (find(conSet.begin(), conSet.end(), &inst)!=conSet.end()){ - return &inst; - } - //need to stop at relevant callIsnsts as well - else if (CallInst* ci = dyn_cast(&inst)){ - if (nested.find(ci->getCalledFunction())!=nested.end()) { - return &inst; - } - } - - } - //otherwise just return the last inst - return &bb->back(); - } - //reverse directions if not forwards - Instruction* prev = NULL; - for(BasicBlock::reverse_iterator i = bb->rbegin(), e = bb->rend(); i!=e;++i) { - Instruction* inst = &*i; - if (find(conSet.begin(), conSet.end(), inst)!=conSet.end()){ - //need to return the previous inst (next in fowards), as it should be inserted before the returned inst - - if (prev == NULL) { - //only happens if use is a ret inst, which is a scope use to make the branching - //work, not an actual one, so this is safe - return inst; - } - return prev; - } - else if (CallInst* ci = dyn_cast(inst)){ - if (nested.find(ci->getCalledFunction())!=nested.end()) { - return prev; - } - } - prev = inst; - } - //otherwise just return first inst of the block - //errs() << "truncate returning " << bb->front() << "\n"; - return &bb->front(); -} - - -Function* ConsistentInference::commonPredecessor(map blockMap, Function* root) -{ - vector funcList; - //add the parents, without duplicates - for (auto map : blockMap) { - if(!(find(funcList.begin(), funcList.end(), map.second->getParent())!=funcList.end())) { - funcList.push_back(map.second->getParent()); - #if DEBUGINFER - errs() << "DEBUGINFER: adding: " << map.second->getParent()->getName()<<"\n"; - #endif - } - } - //easy case: everything is already in the same function - if(funcList.size()==1) { - return funcList.at(0); - } - /* Algo Goal: get the deepest function that still calls (or is) all funcs in funcList. - * Consider: multiple calls? Should be dealt with in the add region function -- eventually each caller - * gets its own region - */ - Function* goal = nullptr; - //Function* root = m->getFunction("app"); - #if DEBUGINFER - errs() << "DEBUGINFER: starting from " << root->getName() << "\n"; - #endif - deepCaller(root, funcList, &goal); - if(goal == nullptr) { - errs() << "ERROR: deepCaller failed\n"; - } - return goal; -} - -/*Recursive: from a root, returns list of called funcs. */ -vector ConsistentInference::deepCaller(Function* root, vector funcList, Function** goal) -{ - vector calledFuncs; - bool mustIncludeSelf = false; - - for (inst_iterator inst = inst_begin(root), E = inst_end(root); inst != E; ++inst) { - if(CallInst* ci = dyn_cast(&(*inst))) { - calledFuncs.push_back(ci->getCalledFunction()); - } - } - vector explorationList; - for (Function* item : funcList) { - - //skip over root or called funcs - if ((find(calledFuncs.begin(), calledFuncs.end(), item)!=calledFuncs.end()) || item == root) { - if (item == root) { - mustIncludeSelf = true; - } - continue; - } - explorationList.push_back(item); - #if DEBUGINFER - errs() << "need to find " << item->getName() <<"\n"; - #endif - } - //this function is a root of a call tree that calls everything in the func List - if (explorationList.empty()) { - #if DEBUGINFER - errs() << "empty list\n"; - #endif - *goal = root; - return calledFuncs; - } - //otherwise recurse - Function* candidate = nullptr; - for (Function* called : calledFuncs) { - vector partial = deepCaller(called, explorationList, &candidate); - //if candidate is set, it means called is a root for everything in the explorationList - if (candidate!=nullptr) { - *goal = candidate; - #if DEBUGINFER - errs() << "New candidate: " << (*goal)->getName() << "\n"; - #endif - } - //remove from explorationList, but add to calledFuncs - for (Function* item : partial) { - func_vec::iterator place = find(explorationList.begin(), explorationList.end(), item); - if(place!=explorationList.end()) { - explorationList.erase(place); - } - calledFuncs.push_back(item); - } - - } - //current point is a root - if(explorationList.empty()) { - //not the deepest - if (candidate!=nullptr && !mustIncludeSelf) { - *goal = candidate; - } else { - //is the deepest - *goal = root; - } - } - return calledFuncs; -} - - - - -/*Recursive: get the min of the maximum length of each regions*/ -inst_inst_pair ConsistentInference::findSmallest(vectorregionsFound) -{ - inst_inst_pair best; - int best_count = 2147483647; - - for (inst_inst_pair candidate : regionsFound) { - Function* root = candidate.first->getFunction(); - int pre = 0 ; - int found = 0; - for (Instruction& inst : *candidate.first->getParent()) { - pre++; - if (&inst==candidate.first) { - break; - - } - } - //get the max length from the bb to the end instruction - vector v; - int length = getSubLength(candidate.first->getParent(), candidate.second, v); - //substract the prefix before the start inst - length -= pre; - if (length < best_count) { - best_count = length; - best = candidate; - //errs() << "best candidate is " << *candidate.first << " and " << - // *candidate.second << " with length " << length << "\n"; - } - - } - return best; -} -//helper func, recursive -int ConsistentInference::getSubLength(BasicBlock* bb, Instruction* end, vector visited){ - int count = 0; - int max_ret = 0; - visited.push_back(bb); - for (Instruction& inst : *bb) { - count++; - if (&inst == end){ - return count; - } - if(CallInst* ci = dyn_cast(&inst)){ - Function* cf = ci->getCalledFunction(); - if (!cf->empty() && cf!=NULL) { - //errs() <<"attempting function " << cf->getName() << "\n"; - count+= cf->getInstructionCount(); - } - } - if (inst.isTerminator()) { - int numS = inst.getNumSuccessors(); - for (int i = 0; i < numS; i++) { - BasicBlock* next = inst.getSuccessor(i); - //already counted -- do something more fancy for loops? - if (find(visited.begin(), visited.end(), next)!=visited.end()) { - continue; - } - int intermed = getSubLength(inst.getSuccessor(i), end, visited); - if (intermed > max_ret) { - max_ret = intermed; - } - } - } - } - return count + max_ret; -} - -bool ConsistentInference::sameFunction(map blockMap) -{ - Function* comp = blockMap.begin()->second->getParent(); - for (auto map : blockMap) { - if (map.second->getParent()!= comp) { - return false; - } - } - return true; -} - - diff --git a/ocelot/AtomicRegionInference/src/Helpers.cpp b/ocelot/AtomicRegionInference/src/Helpers.cpp new file mode 100644 index 0000000..a0f62cf --- /dev/null +++ b/ocelot/AtomicRegionInference/src/Helpers.cpp @@ -0,0 +1,36 @@ +#include "include/Helpers.h" + +std::string getSimpleNodeLabel(const Value* node) { + if (node->hasName()) { + // #if DEBUG + // errs() << "Node has name\n"; + // #endif + return node->getName().str(); + } + + std::string str; + raw_string_ostream OS(str); + + node->printAsOperand(OS, false); + return str; +} + +bool isAnnot(const StringRef annotName) { + return annotName.equals("Fresh") || annotName.equals("Consistent") || annotName.equals("FreshConsistent"); +} + +void printInstInsts(const inst_insts_map& iim, bool onlyCalls) { + for (auto& [inst, inputs] : iim) { + if (!onlyCalls || isa(inst)) { + errs() << *inst << " ->\n"; + for (auto* input : inputs) errs() << *input << "\n"; + errs() << "\n"; + } + } +} + +void printInsts(const inst_vec& iv) { + for (auto& inst : iv) { + errs() << *inst << "\n"; + } +} diff --git a/ocelot/AtomicRegionInference/src/InferAtomicPass.cpp b/ocelot/AtomicRegionInference/src/InferAtomicPass.cpp deleted file mode 100644 index a105841..0000000 --- a/ocelot/AtomicRegionInference/src/InferAtomicPass.cpp +++ /dev/null @@ -1,516 +0,0 @@ -#include "include/InferAtomicPass.h" -#include "include/TaintTracker.h" - -#define CAPSIZE 1000 -#define PRINTMAPS 1 -#define FRESHDEBUG 1 - -void InferAtomicModulePass::removeAnnotations(inst_vec* toDelete) -{ - //delete all the annotation function calls - bool instsLeftToDelete = true; - Instruction* candidate; - while (instsLeftToDelete) { - instsLeftToDelete = false; - //can't delete while directly iterating through the module - for (Function& f : *this->m) { - for (BasicBlock& bb : f) { - for (Instruction& inst : bb) { - - //for now, let's just delete unused core or compiler builtin functions - if(isa(&inst)) { - if (find(toDelete->begin(), toDelete->end(), &inst)!=toDelete->end()) { - candidate = &inst; - instsLeftToDelete = true; - break; - } - } - } - } - } - //recheck, as this could be the last iteration - if(instsLeftToDelete) { - #if DEBUG - errs() << "DEBUG: deleting: " << candidate->getName() <<"\n"; - #endif - candidate->replaceAllUsesWith(UndefValue::get(candidate->getType())); - candidate->eraseFromParent(); - } - - } - //now delete all the annotation functions - //vector toDeleteF; - bool functionsLeftToDelete = true; - Function* candidatef; - while (functionsLeftToDelete) { - functionsLeftToDelete = false; - //can't delete while directly iterating through the module - for (Function& f : *this->m) { - if (f.hasName()) { - //for now, let's just delete unused core or compiler builtin functions - if(f.getName().contains("Fresh")||f.getName().contains("Consistent")) { - candidatef = &f; - functionsLeftToDelete = true; - break; - - } - } - - } - - //recheck, as this could be the last iteration - if(functionsLeftToDelete) { -#if DEBUG - errs() << "DEBUG: deleting: " << candidatef->getName() <<"\n"; -#endif - - candidatef->replaceAllUsesWith(UndefValue::get(candidatef->getType())); - candidatef->eraseFromParent(); - } - } -} - -/* - * Top-level pass for atomic region inference - */ -bool InferAtomicModulePass::runOnModule(Module &M) { - m = &M; - capacitorSize = CAPSIZE; - - //TODO: init atomStart/End with the proper functions - for (Function& F : M) { - if (F.getName().contains("atomic_start")) { - #if DEBUG - errs() << "DEBUG: found atom start\n"; - #endif - atomStart = &F; - } - if (F.getName().contains("atomic_end")) { - #if DEBUG - errs() << "DEBUG: found atom end\n"; - #endif - atomEnd = &F; - } - } - - //Build the consistent set and fresh lists here, to only - //go through all the declarations once. - std::map conVars; - inst_vec_vec freshVars; - inst_insts_map inputInfo = buildInputs(m); - inst_vec toDelete; - getAnnotations(&conVars, &freshVars, inputInfo, &toDelete); - //TODO: need to add unique point of call chain prefix to con set - #if PRINTMAPS - errs () << "Initial fresh is: \n"; - for (inst_vec item : freshVars) { - for (Instruction* item2 : item) { - errs() << *item2 << "\n"; - } - } - errs() << "End init fresh\n"; - #endif - - #if PRINTMAPS - errs () << "Initial consistent is: \n"; - for (auto map : conVars) { - errs() << "Begin set\n"; - for (Instruction* item2 : map.second) { - errs() << *item2 << "\n"; - } - } - errs() << "End init Consistent\n"; - #endif - - #if PRINTMAPS - errs() << "Printing map:\n"; - for (auto map : inputInfo) { - if (isa(map.first)) { - errs() << *(map.first) << "in map\n"; - for (Value* l : map.second) { - errs() << *l << "\n"; - } - } - } - #endif - map allConSets = collectCon(conVars, inputInfo); - inst_vec_vec allFresh = collectFresh(freshVars, inputInfo); - - - - #if PRINTMAPS - errs () << "Fresh is: \n"; - for (inst_vec item : allFresh) { - for (Instruction* item2 : item) { - errs() << *item2 << "\n"; - } - } - errs() << "End fresh\n"; - #endif - - #if PRINTMAPS - errs () << "Consistent is: \n"; - for (auto map : allConSets) { - for (Instruction* item2 : map.second) { - errs() << *item2 << "\n"; - } - } - errs() << "End Consistent\n"; - #endif - - - - //will do consistency first - ConsistentInference* ci = new ConsistentInference(this, &M, atomStart, atomEnd); - - ci->inferConsistent(allConSets); - ci->inferFresh(allFresh); - - //delete annotations - removeAnnotations(&toDelete); - - return true; -} - - -/**This function finds annotated variables)**/ -void InferAtomicModulePass::getAnnotations(std::map* conSets, inst_vec_vec* freshVars, - inst_insts_map inputMap, inst_vec* toDelete) -{ - //note: delete the annotation functions afterwards - map recallSet; - - for (Function& f : *m) { - for (BasicBlock& bb : f) { - for (Instruction& inst : bb) { - if(CallInst* ci = dyn_cast(&inst)) { - Function* called = ci->getCalledFunction(); - //various empty or null checks - if (called==NULL) { - continue; - } - if (called->empty()||!called->hasName()) { - continue; - } - //covers both Consistent and FreshConsistent - if (called->getName().contains("Consistent")) { - //first para is var, second is id - toDelete->push_back(ci); - int setID; - //v.push_back(ai); <<-- don't actually need this? - //bit cast use of x, then value operand of store - Instruction* var = dyn_cast(ci->getOperand(0)); - - if (var==NULL) { - //errs() << "error casting with " << *ci <<"\n"; - continue; - } - // errs() << "New consistent annot. with " << *var<<"\n"; - Value* id = ci->getOperand(1); - if(ConstantInt* cint = dyn_cast(id)) { - setID = cint->getSExtValue(); - } - queue customUsers; - set v; - //v.emplace(ci); - //in case var itself is iOp - for (Instruction* iOp : inputMap[var]) { - v.emplace(iOp); - } - - //customUsers.push(var); - for (Value* use : var->users()) { - //don't push the annotation - if (use == ci) { - continue; - } - //errs() << "DEBUG: pushing use of var: " << *use << "\n"; - customUsers.push(use); - } - while(!customUsers.empty()) { - Value* use = customUsers.front(); - customUsers.pop(); - //errs() << "DEBUG: use is " << *use << " of var " << *var<<"\n"; - if (Instruction* instUse = dyn_cast(use)) { - for (Instruction* iOp : inputMap[instUse]) { - v.emplace(iOp); - // errs() << "DEBUG: adding to v " << *iOp << "\n"; - } - } - if(isa(use)||isa(use)) { - for (Value* use2 : use->users()) { - // errs() << "DEBUG: use2 is " << *use2 << "\n"; - if(StoreInst* si = dyn_cast(use2)){ - for (Instruction* iOp : inputMap[si]) { - v.emplace(iOp); - // errs() << "DEBUG: adding to v " << *iOp << "\n"; - } - } - // errs() << "DEBUG: pushing use2 of var: " << *use2 << "\n"; - customUsers.push(use2); - } - } - - if(isa(use)) { - for (Value* use2 : use->users()) { - // errs() << "DEBUG: use2 is " << *use2 << "\n"; - if(StoreInst* si = dyn_cast(use2)){ - //v.push_back(si); - for (Instruction* iOp : inputMap[si]) { - v.emplace(iOp); - // errs() << "DEBUG: adding to v " << *iOp << "\n"; - } - } - // errs() << "DEBUG: pushing use2 of var: " << *use2 << "\n"; - customUsers.push(use2); - } - } - } - //last case - if (v.empty()) { - //some entries have a first link with ci, not var - - for (Instruction* iOp : inputMap[ci]) { - if (inputMap[ci].size() == 1) { - for (Instruction* origLink : inputMap[iOp]) { - v.emplace(origLink); - } - } else { - v.emplace(iOp); - } - - } - - - } - //for later deletion purposes - inputMap.erase(ci); - - - if (!v.empty()) { - inst_vec temp; - for (Instruction* item : v) { - temp.push_back(item); - } - //add the collected list to the map - if(conSets->find(setID)!=conSets->end()) { - conSets->at(setID).insert(conSets->at(setID).end(), temp.begin(), temp.end()); - } else { - conSets->emplace(setID, temp); - } - } - - } - if (called->getName().contains("Fresh")) { - set v; - toDelete->push_back(ci); - inputMap.erase(ci); - Value* var = ci->getOperand(0); - if (Instruction* inst = dyn_cast(var)) { - v.emplace(inst); - } else { - //errs() << "error casting\n"; - } - //errs() << "New Fresh annot. with " << *var<<"\n"; - // v.push_back(ci); - - for(Value* use : var->users()) { - if(StoreInst* si = dyn_cast(use)){ - // errs() << "DEBUG: pushing " << *use << "\n"; - v.emplace(si); - } - if(isa(use)) { - for (Value* use2 : use->users()) { - // errs() << "DEBUG: pushing " << *use2 << "\n"; - if(StoreInst* si = dyn_cast(use2)){ - v.emplace(si); - } - } - } - } - if (!v.empty()) { - inst_vec temp; - for (Instruction* item : v) { - temp.push_back(item); - } - freshVars->push_back(temp); - } - } - - } - - } - } - } - -} - - - - -/*Given the starting point annotations of conSets, find the -deepest unique point of the call chain*/ -map InferAtomicModulePass::collectCon(map startingPoints, inst_insts_map inputMap) -{ - map toReturn; - for (pair iv : startingPoints ) { - set unique; - map> callChains; - //each item should be the starting point from a different annot - for(Instruction* item : iv.second) { - #if FRESHDEBUG - errs() << "Starting point: " << *item << "\n"; - #endif - //add self to call chain - callChains[item].insert(item); - - for (Instruction* iOp : inputMap[item]) { - // unique.insert(iOp); - callChains[item].insert(iOp); - queue toExplore; - toExplore.push(iOp); - while (!toExplore.empty()) { - Instruction* curr = toExplore.front(); - toExplore.pop(); - for (Instruction* intermed : inputMap[curr]) { - if (! (find(callChains[item].begin(), callChains[item].end(), intermed) - !=callChains[item].end())) { - callChains[item].insert(intermed); - toExplore.push(intermed); - } - } - } - - }// finish constructing call chain for one annot. in the set - - }//constructed call chains for ALL annot. in the set. - //now check the call chain - - //int index = 0; - //map foundUniquePoint; - //clean up the call chains - - for(auto ccmap : callChains) { - for (Instruction* possibility : ccmap.second) { - //if the link is in the same function, then continue - //errs() << "examining possibility: " << *possibility << "\n"; - bool sf = false; - for (Instruction* link : inputMap[possibility]) { - //errs() << "next link is" << *link << "\n"; - if ((link!=possibility) && link->getFunction() == possibility->getFunction()) { - sf = true; - - } - } - if (sf) { - continue; - } - bool isUnique = true; - for (auto ccmapNest : callChains) { - //if self then skip - if (ccmapNest == ccmap) { - continue; - } - //otherwise check if this map also contains the possibility - if (find(ccmapNest.second.begin(), ccmapNest.second.end(), possibility) - != ccmapNest.second.end()) - { - isUnique = false; - break; - } - } - if (isUnique){ - unique.insert(possibility); - // errs() << "Found unique!" << *possibility << "\n"; - } else { - //try another poss. - continue; - } - } - } - - - inst_vec v; - for (Instruction* item2 : unique) { - if (!isa(item2)) { - v.push_back(item2); - } - } - toReturn[iv.first] = v; - }//end starting point check - - return toReturn; -} - -/*This function collects the input srcs and uses off of the fresh annotated vars*/ -inst_vec_vec InferAtomicModulePass::collectFresh(inst_vec_vec startingPoints, inst_insts_map inputMap) -{ - inst_vec_vec toReturn; - - for (inst_vec iv : startingPoints ) { - set unique; - set callChain; - for(Instruction* item : iv) { - #if FRESHDEBUG - errs() << "Starting point: " << *item << "\n"; - #endif - //uses (forwards) are direct only (might need a little chaining for direct in rs to be direct in IR) - inst_vec uses = traverseDirectUses(item); - - for (Instruction* use : uses) { - #if FRESHDEBUG - errs() << "Starting point use: " << *use << "\n"; - #endif - // if (isa(use)||isa(use)) { - unique.insert(use); - //} - for (Instruction* iOp : inputMap[use]) { - unique.insert(iOp); - } - } - - for (Instruction* iOp : inputMap[item]) { - unique.insert(iOp); - callChain.insert(iOp); - queue toExplore; - toExplore.push(iOp); - while (!toExplore.empty()) { - Instruction* curr = toExplore.front(); - toExplore.pop(); - for (Instruction* intermed : inputMap[curr]) { - if (! (find(callChain.begin(), callChain.end(), intermed)!=callChain.end())) { - callChain.insert(intermed); - toExplore.push(intermed); - } - } - } - - } - //don't forget the item itself - if (isa(item)||isa(item)) { - unique.insert(item); - } - - - } - //now construct the call chain - for (Instruction* vv : callChain) { - // errs() << "call chain val: " << *vv <<"\n"; - unique.insert(vv); - } - inst_vec v; - for (Instruction* item2 : unique) { - if (!isa(item2)) { - v.push_back(item2); - } - } - toReturn.push_back(v); - } - - - return toReturn; -} - -char InferAtomicModulePass::ID = 0; - -RegisterPass X("atomize", "Infer Atomic Pass"); diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp new file mode 100644 index 0000000..3843383 --- /dev/null +++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp @@ -0,0 +1,529 @@ +#include "include/InferAtoms.h" + +#define CAPSIZE 1000 + +// Top-level pass for atomic region inference +PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) { + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + + setModule(&M); + + for (auto& F : M) { + auto FName = F.getName(); + if (FName.equals("atomic_start")) { +#if DEBUG + errs() << "Found atomic_start\n"; +#endif + atomStart = &F; + } else if (FName.equals("atomic_end")) { +#if DEBUG + errs() << "Found atomic_end\n"; +#endif + atomEnd = &F; + } + } + + // Build the consistent set and fresh lists here, + // to only go through all the declarations once. + std::map consVars; + inst_vec_vec freshVars; + inst_insts_map inputMap = buildInputs(this->M); + inst_vec toDelete; + getAnnotations(&consVars, &freshVars, inputMap, &toDelete); + // TODO: need to add unique point of call chain prefix to cons set + +#if DEBUG + errs() << "Initial Fresh:\n"; + for (auto& insts : freshVars) + for (auto* inst : insts) errs() << *inst << "\n"; +#endif + +#if DEBUG + errs() << "Initial Consistent:\n"; + for (auto& [_, insts] : consVars) { + for (auto* inst : insts) errs() << *inst << "\n"; + } +#endif + +#if DEBUG + errs() << "Print inputMap CallInst entries:\n"; + printInstInsts(inputMap, true); +#endif + + auto allConsSets = collectCons(consVars, inputMap); + auto allFresh = collectFresh(freshVars, inputMap); + +#if DEBUG + errs() << "Fresh after collect: \n"; + for (auto& varSet : allFresh) + for (auto* var : varSet) errs() << *var << "\n"; +#endif + +#if DEBUG + errs() << "Consistent after collect: \n"; + for (auto& [_, insts] : allConsSets) + for (auto* inst : insts) errs() << *inst << "\n"; +#endif + + // Consistent first + InferFreshCons* ci = new InferFreshCons(&FAM, &M, atomStart, atomEnd); + + ci->inferConsistent(allConsSets); + ci->inferFresh(allFresh); + + // Delete annotations + removeAnnotations(&toDelete); + + return PreservedAnalyses::none(); +} + +// This function finds annotated variables +void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_vec* freshVars, + inst_insts_map inputMap, inst_vec* toDelete) { +#if DEBUG + errs() << "=== getAnnotations ===\n"; +#endif + for (auto& F : *this->M) { + for (auto& B : F) { + for (auto& I : B) { + if (auto* ci = dyn_cast(&I)) { +#if DEBUG + errs() << "[Loop Inst] cur inst = CallInst\n"; +#endif + auto* fun = ci->getCalledFunction(); + // Various empty or null checks + if (fun == NULL || fun->empty() || !fun->hasName()) continue; + // Consistent and FreshConsistent + if (isAnnot(fun->getName()) && !fun->getName().equals("Fresh")) { +#if DEBUG + errs() << "[Loop Inst] Calls Consistent/FreshConsistent\n"; +#endif + toDelete->push_back(ci); + // First para is var, second is id + int setID; + // Bit cast use of x, then value operand of store + Instruction* var = dyn_cast(ci->getOperand(0)); + + if (var == NULL) continue; + // errs() << "New consistent annot. with " << *var<<"\n"; + Value* id = ci->getOperand(1); + if (ConstantInt* cint = dyn_cast(id)) { + setID = cint->getSExtValue(); + } + std::queue customUsers; + std::set v; + // v.emplace(ci); + // in case var itself is iOp + for (Instruction* iOp : inputMap[var]) { + v.emplace(iOp); + } + + // customUsers.push(var); + for (Value* use : var->users()) { + // don't push the annotation + if (use == ci) { + continue; + } + // errs() << "DEBUG: pushing use of var: " << *use << "\n"; + customUsers.push(use); + } + while (!customUsers.empty()) { + Value* use = customUsers.front(); + customUsers.pop(); + // errs() << "DEBUG: use is " << *use << " of var " << *var<<"\n"; + if (Instruction* instUse = dyn_cast(use)) { + for (Instruction* iOp : inputMap[instUse]) { + v.emplace(iOp); + // errs() << "DEBUG: adding to v " << *iOp << "\n"; + } + } + if (isa(use) || isa(use)) { + for (Value* use2 : use->users()) { + // errs() << "DEBUG: use2 is " << *use2 << "\n"; + if (StoreInst* si = dyn_cast(use2)) { + for (Instruction* iOp : inputMap[si]) { + v.emplace(iOp); + // errs() << "DEBUG: adding to v " << *iOp << "\n"; + } + } + // errs() << "DEBUG: pushing use2 of var: " << *use2 << "\n"; + customUsers.push(use2); + } + } + + if (isa(use)) { + for (Value* use2 : use->users()) { + // errs() << "DEBUG: use2 is " << *use2 << "\n"; + if (StoreInst* si = dyn_cast(use2)) { + // v.push_back(si); + for (Instruction* iOp : inputMap[si]) { + v.emplace(iOp); + // errs() << "DEBUG: adding to v " << *iOp << "\n"; + } + } + // errs() << "DEBUG: pushing use2 of var: " << *use2 << "\n"; + customUsers.push(use2); + } + } + } + // last case + if (v.empty()) { + // some entries have a first link with ci, not var + + for (Instruction* iOp : inputMap[ci]) { + if (inputMap[ci].size() == 1) { + for (Instruction* origLink : inputMap[iOp]) { + v.emplace(origLink); + } + } else { + v.emplace(iOp); + } + } + } + // for later deletion purposes + inputMap.erase(ci); + + if (!v.empty()) { + inst_vec temp; + for (Instruction* item : v) { + temp.push_back(item); + } + // add the collected list to the map + if (consVars->find(setID) != consVars->end()) { + consVars->at(setID).insert(consVars->at(setID).end(), temp.begin(), temp.end()); + } else { + consVars->emplace(setID, temp); + } + } + } else if (fun->getName().equals("Fresh")) { +#if DEBUG + errs() << "[Loop Inst] Calls Fresh\n"; +#endif + std::set v; + toDelete->push_back(ci); + +#if DEBUG + errs() << "[Loop Inst] Print inputMap entries:\n"; + printInstInsts(inputMap); +#endif + + //* Can't actually remove, otherwise wrong result + // #if DEBUG + // errs() << "[Loop Inst] Remove Fresh call from inputMap\n"; + // #endif + // inputMap.erase(ci); + + auto* arg = ci->getOperand(0); +#if DEBUG + errs() << "[Loop Inst] Fresh arg: " << *arg << "\n"; +#endif + if (auto* inst = dyn_cast(arg)) { +#if DEBUG + errs() << "[Loop Inst] arg = Instruction, add to v\n"; +#endif + v.emplace(inst); + + //* Actually collect all uses (e.g., log(x)) + if (auto* li = dyn_cast(inst)) { +#if DEBUG + errs() << "[Loop Inst] Further arg = LoadInst\n"; +#endif + auto* ptr = li->getPointerOperand(); +#if DEBUG + errs() << "[Loop Inst] Ptr operand: " << *ptr << "\n"; +#endif + for (auto* ptrUse : ptr->users()) { +#if DEBUG + errs() << "[Loop ptr users] ptrUse: " << *ptrUse << "\n"; +#endif + if (ptrUse != inst) { + if (auto* liUse = dyn_cast(ptrUse)) { + errs() << "[Loop ptr users] Diff LoadInst ptrUse, add to v\n"; + v.emplace(liUse); + } + } + } + } + } else { + // errs() << "error casting\n"; + } + // errs() << "New Fresh annot. with " << *var<<"\n"; + // v.push_back(ci); + +#if DEBUG + errs() << "[Loop Inst] Go over arg users\n"; +#endif + for (auto* use : arg->users()) { + if (auto* si = dyn_cast(use)) { +#if DEBUG + errs() << "[Loop Users] use = StoreInst, add to v: " << *si << "\n"; +#endif + v.emplace(si); + } else if (isa(use)) { + for (auto* use2 : use->users()) { + if (auto* si = dyn_cast(use2)) { + v.emplace(si); + } + } + } + } + + if (!v.empty()) { +#if DEBUG + errs() << "[Loop Inst] Add v's insts to a set in freshVars:\n"; +#endif + inst_vec tmp; + for (auto* inst : v) { +#if DEBUG + errs() << "[Loop v] " << *inst << "\n"; +#endif + tmp.push_back(inst); + } + freshVars->push_back(tmp); + } + } + } + } + } + } + +#if DEBUG + errs() << "*** getAnnotations ***\n"; +#endif +} + +void InferAtomsPass::removeAnnotations(inst_vec* toDelete) { + std::vector toDeleteF; + + // Delete all annotation function calls + for (auto& F : *this->M) { + if (F.hasName() && isAnnot(F.getName())) + toDeleteF.push_back(&F); + else + for (auto& B : F) { + auto I = B.begin(); + for (; I != B.end(); I++) { + if (auto* ci = dyn_cast(I)) { + // TODO: no need to confirm in toDelete? + if (std::find(toDelete->begin(), toDelete->end(), &*I) != toDelete->end()) { +#if DEBUG + errs() << "Remove call: " << *I << "\n"; +#endif + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I = I->eraseFromParent(); + + //* Remove args and their uses as well + for (auto& arg : ci->args()) { + if (auto* argInst = dyn_cast(arg)) { +#if DEBUG + errs() << "Remove call arg: " << *argInst << "\n"; +#endif + argInst->eraseFromParent(); + argInst->replaceAllUsesWith(UndefValue::get(argInst->getType())); + } + } + } + } + } + } + } + + // Delete all annotation function defs + for (auto F : toDeleteF) { +#if DEBUG + errs() << "Remove function " << F->getName() << "\n"; +#endif + F->replaceAllUsesWith(UndefValue::get(F->getType())); + F->eraseFromParent(); + } +} + +/*Given the starting point annotations of conSets, find the +deepest unique point of the call chain*/ +std::map InferAtomsPass::collectCons(std::map startingPoints, inst_insts_map inputMap) { + std::map toReturn; + for (std::pair iv : startingPoints) { + std::set unique; + std::map> callChains; + // each item should be the starting point from a different annot + for (Instruction* item : iv.second) { +#if DEBUG + errs() << "Starting point: " << *item << "\n"; +#endif + // add self to call chain + callChains[item].insert(item); + + for (Instruction* iOp : inputMap[item]) { + // unique.insert(iOp); + callChains[item].insert(iOp); + std::queue toExplore; + toExplore.push(iOp); + while (!toExplore.empty()) { + Instruction* curr = toExplore.front(); + toExplore.pop(); + for (Instruction* intermed : inputMap[curr]) { + if (!(find(callChains[item].begin(), callChains[item].end(), intermed) != callChains[item].end())) { + callChains[item].insert(intermed); + toExplore.push(intermed); + } + } + } + + } // finish constructing call chain for one annot. in the set + + } // constructed call chains for ALL annot. in the set. + // now check the call chain + + // int index = 0; + // map foundUniquePoint; + // clean up the call chains + + for (auto ccmap : callChains) { + for (Instruction* possibility : ccmap.second) { + // if the link is in the same function, then continue + // errs() << "examining possibility: " << *possibility << "\n"; + bool sf = false; + for (Instruction* link : inputMap[possibility]) { + // errs() << "next link is" << *link << "\n"; + if ((link != possibility) && link->getFunction() == possibility->getFunction()) { + sf = true; + } + } + if (sf) { + continue; + } + bool isUnique = true; + for (auto ccmapNest : callChains) { + // if self then skip + if (ccmapNest == ccmap) { + continue; + } + // otherwise check if this map also contains the possibility + if (find(ccmapNest.second.begin(), ccmapNest.second.end(), possibility) != ccmapNest.second.end()) { + isUnique = false; + break; + } + } + if (isUnique) { + unique.insert(possibility); + // errs() << "Found unique!" << *possibility << "\n"; + } else { + // try another poss. + continue; + } + } + } + + inst_vec v; + for (Instruction* item2 : unique) { + if (!isa(item2)) { + v.push_back(item2); + } + } + toReturn[iv.first] = v; + } // end starting point check + + return toReturn; +} + +// Collects the source inputs and uses of Fresh-annotated vars +inst_vec_vec InferAtomsPass::collectFresh(inst_vec_vec freshVars, inst_insts_map inputMap) { +#if DEBUG + errs() << "=== collectFresh ===\n"; +#endif + inst_vec_vec toReturn; + +#if DEBUG + errs() << "Go over fresh var sets\n"; +#endif + for (auto varSet : freshVars) { +#if DEBUG + errs() << "[Loop freshVars] Go over varSet:\n"; + printInsts(varSet); +#endif + std::set unique, callChain; + for (auto* var : varSet) { +#if DEBUG + errs() << "[Loop varSet] Cur var: " << *var << "\n"; +#endif + // Uses (forwards) are direct only (might need a little chaining for direct in rs to be direct in IR) + inst_vec uses = traverseDirectUses(var); + +#if DEBUG + errs() << "[Loop varSet] Go over uses of var\n"; +#endif + for (auto* use : uses) { +#if DEBUG + errs() << "[Loop uses] Cur use: " << *use << "\n"; + errs() << "[Loop uses] Add use to unique\n"; +#endif + unique.insert(use); + for (auto* input : inputMap[use]) { +#if DEBUG + errs() << "[Loop inputMap[use]] Add src input of use to unique: " << *input << "\n"; +#endif + unique.insert(input); + } + } + +#if DEBUG + errs() << "[Loop varSet] Go over src inputs of var\n"; +#endif + for (auto* input : inputMap[var]) { +#if DEBUG + errs() << "[Loop inputMap[var]] Cur src input: " << *input << "\n"; +#endif + unique.insert(input); + callChain.insert(input); + std::queue toExplore; + toExplore.push(input); + while (!toExplore.empty()) { + Instruction* curr = toExplore.front(); + toExplore.pop(); + for (Instruction* intermed : inputMap[curr]) { + if (!(find(callChain.begin(), callChain.end(), intermed) != callChain.end())) { + callChain.insert(intermed); + toExplore.push(intermed); + } + } + } + } + + // Add the var itself + if (isa(var) || isa(var)) { +#if DEBUG + errs() << "[Loop varSet] Cur var = StoreInst/CallInst, add to unique\n"; +#endif + unique.insert(var); + } + } + // Now construct the call chain + for (auto* vv : callChain) { + unique.insert(vv); + } + inst_vec v; +#if DEBUG + errs() << "[Loop freshVars] Go over unique\n"; +#endif + for (auto* inst : unique) { + if (!isa(inst)) { +#if DEBUG + errs() << "[Loop unique] Cur inst != AllocaInst, add to v: " << *inst << "\n"; +#endif + v.push_back(inst); + } + } + +#if DEBUG + errs() << "[Loop FreshVars] Add v to toReturn\n"; +#endif + toReturn.push_back(v); + } + +#if DEBUG + errs() << "*** collectFresh ***\n"; +#endif + return toReturn; +} diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp new file mode 100644 index 0000000..6258661 --- /dev/null +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -0,0 +1,565 @@ +#include "include/InferFreshCons.h" + +#include "llvm/Analysis/PostDominators.h" + +Instruction* InferFreshCons::insertRegionInst(int toInsertType, Instruction* insertBefore) { +#if DEBUG + errs() << "=== insertRegionInst ===\n"; +#endif + Instruction* call; + IRBuilder<> builder(insertBefore); + // Insert a region start inst + if (toInsertType == 0) { +#if DEBUG + errs() << "Insert start before: " << *insertBefore << "\n"; +#endif + call = builder.CreateCall(this->atomStart); + } else { + // Insert a region end inst +#if DEBUG + errs() << "Insert end before: " << *insertBefore << "\n"; +#endif + call = builder.CreateCall(atomEnd); + } + +#if DEBUG + errs() << "*** insertRegionInst ***\n"; +#endif + return call; +} + +// If a direct pred is also a successor, then it's a for loop block +bool InferFreshCons::loopCheck(BasicBlock* B) { + auto BName = getSimpleNodeLabel(B); + if (!B->hasNPredecessors(1)) { + for (auto it = pred_begin(B), et = pred_end(B); it != et; ++it) { + BasicBlock* predecessor = *it; + StringRef pname = predecessor->getName().drop_front(2); + // errs() << "comparing " << pname<< " and " < 0) { + // errs() << "comparison is true\n"; + return true; + } + } + } + return false; +} + +// Find the first block after a for loop +BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) { + Instruction* ti = bb->getTerminator(); + BasicBlock* end = ti->getSuccessor(0); + ti = end->getTerminator(); + // errs() << "end is " << end->getName() << "\n"; + // for switch inst, succ 0 is the fall through + end = ti->getSuccessor(1); + // errs() << "end is " << end->getName() << "\n"; + return end; +} + +// Top level region inference function -- could flatten later +void InferFreshCons::inferConsistent(std::map consSets) { + // TODO: start with pseudo code structure from design doc + for (auto [id, set] : consSets) { +#if DEBUG + errs() << "[InferConsistent] starting set " << id << "\n"; +#endif + addRegion(set, 0); + } +} + +// The only difference is outer map vs outer vec +void InferFreshCons::inferFresh(inst_vec_vec freshSets) { +#if DEBUG + errs() << "=== inferFresh ===\n"; +#endif + // TODO: start with pseudo code structure from design doc + for (auto set : freshSets) addRegion(set, 1); +#if DEBUG + errs() << "*** inferFresh ***\n"; +#endif +} + +// Region type: 0 for Consistent, 1 for Fresh +void InferFreshCons::addRegion(inst_vec set, int regionType) { +#if DEBUG + errs() << "=== addRegion ===\n"; +#endif + // A map from set item to bb + std::map blocks; + // A queue of regions that still need to be processed + std::queue> regionsNeeded; + +#if DEBUG + errs() << "Build map from inst to bb\n"; +#endif + for (auto* item : set) blocks[item] = item->getParent(); + +#if DEBUG + errs() << "Add map to regionsNeeded\n"; +#endif + regionsNeeded.push(blocks); + + auto* root = m->getFunction("app"); + + // Iterate until no more possible regions, then pick the best one + inst_inst_vec regionsFound; + while (!regionsNeeded.empty()) { + // Need to raise all blocks in the map until they are the same + auto blockMap = regionsNeeded.front(); + regionsNeeded.pop(); + // Record which functions have been travelled through + std::set nested; + +#if DEBUG + errs() << "[Loop regionsNeeded] Check if blocks are in diff functions\n"; +#endif + while (!sameFunction(blockMap)) { + // To think on: does this change? + auto* goal = findCandidate(blockMap, root); +#if DEBUG + errs() << "[Loop !sameFunction] Go over each item in set\n"; +#endif + for (auto* item : set) { + // not all blocks need to be moved up + Function* currFunc = blockMap[item]->getParent(); + nested.insert(currFunc); + if (currFunc != goal) { + // if more than one call: + // callChain info is already in the starting set + // so only explore a caller if it's in conSet + bool first = true; + for (User* use : currFunc->users()) { + // if (regionType == 1) { + if (!(find(set.begin(), set.end(), use) != set.end())) { + continue; + } + // errs() << "Use: "<< *use << " is in call chain\n"; + //} + Instruction* inst = dyn_cast(use); +#if DEBUGINFER + errs() << "DEBUGINFER: examining use: " << *inst << "\n"; +#endif + if (inst == NULL) { + // errs () <<"ERROR: use " << *use << "not an instruction\n"; + break; + } + // update the original map + if (first) { + blockMap[item] = inst->getParent(); + first = false; + } else { + // copy the blockmap, update, add to queue + Instruction* inst = dyn_cast(use); + std::map copy; + for (auto map : blockMap) { + copy[map.first] = map.second; + } + copy[item] = inst->getParent(); + regionsNeeded.push(copy); + } + } // end forall uses + } // end currFunc check + } // end forall items + } // end same function check + +// Now, all bbs in the map are in the same function, so we can run +// dom or post-dom analysis on that function +#if DEBUG + errs() << "[Loop regionsNeeded] Start dom tree analysis\n"; +#endif + auto* home = blockMap.begin()->second->getParent(); + if (home == nullptr) { +#if DEBUG + errs() << "[Loop regionsNeeded] No function found\n"; +#endif + continue; + } +#if DEBUG + errs() << "[Loop regionsNeeded] Found home fun: " << home->getName() << "\n"; +#endif + auto& domTree = FAM->getResult(*home); + // Find the closest point that dominates + auto* startDom = blockMap.begin()->second; + for (auto& [_, B] : blockMap) { + startDom = domTree.findNearestCommonDominator(B, startDom); + } +#if DEBUG + errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n"; +#endif +// TODO: if an inst in the set is in the bb, we can truncate? +#if DEBUG + errs() << "Start post dom tree analysis\n"; +#endif + // Flip directions for the region end + auto& postDomTree = FAM->getResult(*home); + // Find the closest point that dominates + auto* endDom = blockMap.begin()->second; + for (auto map : blockMap) { +#if DEBUGINFER + if (endDom != nullptr) { + errs() << "Finding post dom of: " << getSimpleNodeLabel(map.second) << " and " << getSimpleNodeLabel(endDom) << "\n"; + } else { + errs() << "endDom is null\n"; + } +#endif + endDom = postDomTree.findNearestCommonDominator(map.second, endDom); + } +#if DEBUG + errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n"; +#endif + + if (startDom == nullptr) { + errs() << "[Error] Null startDom\n"; + } else if (endDom == nullptr) { + errs() << "[Error] Null endDom\n"; + } + // Need to make the start and end dominate each other as well. + startDom = domTree.findNearestCommonDominator(startDom, endDom); + endDom = postDomTree.findNearestCommonDominator(startDom, endDom); +#if DEBUG + errs() << "[Loop regionsNeeded] After matching scope\n"; + errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n"; + errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n"; +#endif + + // Extra check to disallow loop conditional block as the end + if (loopCheck(endDom)) { +#if DEBUG + errs() << "[Loop regionsNeeded] Loop check passed\n"; +#endif + endDom = getLoopEnd(endDom); + } + + if (startDom == nullptr) { + errs() << "[Error] Null startDom after scope merge\n"; + } else if (endDom == nullptr) { + errs() << "[Error] Null endDom after scope merge\n"; + } +#if DEBUG + errs() << "[Loop regionsNeeded] Insert insts\n"; +#endif + // TODO: fallback if endDom is null? Need hyper-blocks, I think + // possibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations? + auto* regionStart = truncate(startDom, true, set, nested); + auto* regionEnd = truncate(endDom, false, set, nested); + if (regionStart == nullptr) { + errs() << "[Error] Null startDom after truncation\n"; + } else if (regionEnd == nullptr) { + errs() << "[Error] Null endDom after truncation\n"; + } else { + // errs() << "Region start is before " << *regionStart<<" and region end is before " << *regionEnd<<"\n"; + } + +#if DEBUG + errs() << "[Loop regionsNeeded] Add to regionsFound: (" << *regionStart << ", " << *regionEnd << ")\n"; +#endif + // Insert into regionsFound + regionsFound.emplace_back(regionStart, regionEnd); + } // end while regions needed + + // Now see which region is smallest -- instruction count? they must dominate + // each other, so there's no possibility of not running into the start from + // the end + auto [regionStart, regionEnd] = findShortest(regionsFound); + insertRegionInst(0, regionStart); + insertRegionInst(1, regionEnd); + //}//end while regions needed + +#if DEBUG + errs() << "*** addRegion ***\n"; +#endif +} + +// Truncate a bb if the instruction is in the bb +Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set, std::set nested) { +#if DEBUG + errs() << "=== truncate ===\n"; +#endif + +#if DEBUG + errs() << "Set:\n"; + for (auto& inst : set) + errs() << *inst << "\n"; +#endif + + // Truncate the front + if (forwards) { +#if DEBUG + errs() << "Truncate startDom\n"; + errs() << "Go over each inst\n"; +#endif + for (auto& I : *B) { + // Stop at first inst in bb that is in the set. + if (find(set.begin(), set.end(), &I) != set.end()) { +#if DEBUG + errs() << "[Loop B] Found first inst also in set: " << I << "\n"; +#endif + return &I; + } + // Need to stop at relevant CallInsts as well + else if (auto* ci = dyn_cast(&I)) { + if (nested.find(ci->getCalledFunction()) != nested.end()) + return &I; + } + } + +#if DEBUG + errs() << "Found no inst, return last inst\n"; +#endif + // Otherwise just return the last inst + return &B->back(); + } + +#if DEBUG + errs() << "Truncate endDom\n"; + errs() << "Go over each inst in reverse\n"; +#endif + // Reverse directions if not forwards + Instruction* prev = NULL; + for (auto I = B->rbegin(), rend = B->rend(); I != rend; I++) { + auto* inst = &*I; + if (find(set.begin(), set.end(), inst) != set.end()) { +#if DEBUG + errs() << "[Loop B] Found last inst also in set: " << *I << "\n"; +#endif + // Need to return the previous inst (next in forwards), + // as it should be inserted before the returned inst + if (prev == NULL) { + // Only happens if use is a ret inst, which is a scope use to make the branching + // work, not an actual one, so this is safe + return inst; + } + +#if DEBUG + errs() << "[Loop B] Return prev inst: " << *prev << "\n"; +#endif + return prev; + } else if (auto* ci = dyn_cast(inst)) { + if (nested.find(ci->getCalledFunction()) != nested.end()) { + return prev; + } + } + prev = inst; + } + +#if DEBUG + errs() << "*** truncate ***\n"; +#endif + +#if DEBUG + errs() << "Found no inst, return first inst\n"; +#endif + // Otherwise just return first inst of the block + // errs() << "truncate returning " << bb->front() << "\n"; + return &B->front(); +} + +// findCandidate +Function* InferFreshCons::findCandidate(std::map blockMap, Function* root) { +#if DEBUG + errs() << "== findCandidate ===\n"; +#endif + std::vector funList; + // Add the parents, without duplicates + for (auto& [_, B] : blockMap) { + if (!(find(funList.begin(), funList.end(), B->getParent()) != funList.end())) { +#if DEBUG + errs() << "Add: " << B->getParent()->getName() << "\n"; +#endif + funList.push_back(B->getParent()); + } + } + + // Easy case: everything is already in the same function + if (funList.size() == 1) return funList.at(0); + + /* Algo Goal: get the deepest function that still calls (or is) all funcs in funcList. + * Consider: multiple calls? Should be dealt with in the add region function -- eventually each caller + * gets its own region + */ + Function* goal = nullptr; +#if DEBUG + errs() << "starting from " << root->getName() << "\n"; +#endif + deepCaller(root, funList, &goal); + if (goal == nullptr) { + errs() << "ERROR: deepCaller failed\n"; + } + +#if DEBUG + errs() << "*** findCandidate ***\n"; +#endif + return goal; +} + +/*Recursive: from a root, returns list of called funcs. */ +std::vector InferFreshCons::deepCaller(Function* root, std::vector& funList, Function** goal) { + std::vector calledFuncs; + bool mustIncludeSelf = false; + + for (inst_iterator inst = inst_begin(root), E = inst_end(root); inst != E; ++inst) { + if (CallInst* ci = dyn_cast(&(*inst))) { + calledFuncs.push_back(ci->getCalledFunction()); + } + } + std::vector explorationList; + for (auto* item : funList) { + // skip over root or called funcs + if ((find(calledFuncs.begin(), calledFuncs.end(), item) != calledFuncs.end()) || item == root) { + if (item == root) { + mustIncludeSelf = true; + } + continue; + } + explorationList.push_back(item); +#if DEBUGINFER + errs() << "need to find " << item->getName() << "\n"; +#endif + } + // this function is a root of a call tree that calls everything in the func List + if (explorationList.empty()) { +#if DEBUGINFER + errs() << "empty list\n"; +#endif + *goal = root; + return calledFuncs; + } + // otherwise recurse + Function* candidate = nullptr; + for (Function* called : calledFuncs) { + std::vector partial = deepCaller(called, explorationList, &candidate); + // if candidate is set, it means called is a root for everything in the explorationList + if (candidate != nullptr) { + *goal = candidate; +#if DEBUGINFER + errs() << "New candidate: " << (*goal)->getName() << "\n"; +#endif + } + // remove from explorationList, but add to calledFuncs + for (Function* item : partial) { + func_vec::iterator place = find(explorationList.begin(), explorationList.end(), item); + if (place != explorationList.end()) { + explorationList.erase(place); + } + calledFuncs.push_back(item); + } + } + // current point is a root + if (explorationList.empty()) { + // not the deepest + if (candidate != nullptr && !mustIncludeSelf) { + *goal = candidate; + } else { + // is the deepest + *goal = root; + } + } + return calledFuncs; +} + +// Get the min of the max length of each region +inst_inst_pair InferFreshCons::findShortest(inst_inst_vec regionsFound) { +#if DEBUG + errs() << "=== findShortest ===\n"; +#endif + inst_inst_pair best; + int shortest = INT32_MAX; + +#if DEBUG + errs() << "Go over regionsFound\n"; +#endif + for (auto& [start, end] : regionsFound) { + int prefixLength = 0, found = 0; + auto* startParent = start->getParent(); +#if DEBUG + errs() << "[Loop regionsFound] startParent: " << *startParent << "\n"; + errs() << "Go over startParent insts\n"; +#endif + for (auto& I : *startParent) { + prefixLength++; + if (&I == start) break; + } + + // Get the max length from the bb to the end instruction + std::vector v; + int endLength = getSubLength(startParent, end, v); + // Substract the prefix before the start inst + endLength -= prefixLength; +#if DEBUG + errs() << "[Loop regionsFound] Region length " << endLength << "\n"; +#endif + if (endLength < shortest) { +#if DEBUG + errs() << "[Loop regionsFound] Shortest region: (" << *start << ", " << *end + << ") at length " << endLength << "\n"; +#endif + shortest = endLength; + best = std::make_pair(start, end); + } + } + +#if DEBUG + errs() << "*** findShortest ***\n"; +#endif + return best; +} + +int InferFreshCons::getSubLength(BasicBlock* B, Instruction* end, std::vector visited) { +#if DEBUG + errs() << "=== getSubLength ===\n"; +#endif + + int count = 0, max_ret = 0; + visited.push_back(B); +#if DEBUG + errs() << "Go over bb insts\n"; +#endif + for (auto& I : *B) { + count++; + + if (&I == end) { +#if DEBUG + errs() << "[Loop I] Cur inst = end, stop\n"; +#endif + return count; + } + + if (auto* ci = dyn_cast(&I)) { + auto* cf = ci->getCalledFunction(); + if (!cf->empty() && cf != NULL) { +#if DEBUG + errs() << "[Loop I] Cur inst = CallInst, calling: " << cf->getName() << "\n"; +#endif + count += cf->getInstructionCount(); + } + } + + if (I.isTerminator()) { +#if DEBUG + errs() << "[Loop I] Cur inst = terminator\n"; +#endif + for (int i = 0; i < I.getNumSuccessors(); i++) { + auto* next = I.getSuccessor(i); + // already counted -- do something more fancy for loops? + if (find(visited.begin(), visited.end(), next) != visited.end()) continue; + int intermed = getSubLength(next, end, visited); + if (intermed > max_ret) { + max_ret = intermed; + } + } + } + } + +#if DEBUG + errs() << "*** getSubLength ***\n"; +#endif + return count + max_ret; +} + +bool InferFreshCons::sameFunction(std::map blockMap) { + auto* BComp = blockMap.begin()->second->getParent(); + for (auto& [_, B] : blockMap) + if (B->getParent() != BComp) return false; + return true; +} diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp index 0033a78..ee22ad8 100644 --- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp +++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp @@ -1,847 +1,905 @@ #include "include/TaintTracker.h" +// Main dataflow function to construct map of store (TODO: not just stores) insts to vars (inputs?) they depend on +inst_insts_map buildInputs(Module* M) { +#if DEBUG + errs() << "=== buildInputs ===\n"; +#endif + + inst_vec inputInsts = findInputInsts(M); + inst_insts_map taintedInsts; + inst_vec promotedInputs; + + for (auto inputInst : inputInsts) { +#if DEBUG + errs() << "[Loop inputInst] orig input: " << *inputInst << "\n"; +#endif + + // Add self to map + taintedInsts[inputInst].insert(inputInst); + std::queue toExplore; +#if DEBUG + errs() << "[Loop inputInst] Add orig input to toExplore\n"; +#endif + toExplore.push(inputInst); + +#if DEBUG + errs() << "[Loop inputInst] Explore flows from orig input\n"; +#endif + + // Iterate until no more inter-proc flows found + while (!toExplore.empty()) { +#if DEBUG + errs() << "=== Loop toExplore ===\n"; +#endif + auto* curVal = toExplore.front(); + toExplore.pop(); -/*Main DataFlow function to construct map of store insts to vars they depend on*/ -inst_insts_map buildInputs(Module* m) -{ - inst_vec inputs = findInputInsts(m); - inst_insts_map taintedDecl; - inst_vec promoted_inputs; - - for (Instruction* iOp : inputs) { - #if DEBUG - errs() << "Starting input: " << *iOp <<"\n"; - #endif - //don't forget to add self to map - taintedDecl[iOp].insert(iOp); - queue toExplore; - toExplore.push(iOp); - - //iterate until no more interproc flows found - while(!toExplore.empty()) { - - Value* currVal = toExplore.front(); - if (currVal == NULL) { - continue; - } + if (curVal == NULL) continue; + +#if DEBUG + errs() << "[Loop toExplore] cur inst: " << *curVal << "\n"; +#endif val_vec interProcFlows; - toExplore.pop(); - if (currVal == iOp) { - interProcFlows = traverseLocal(currVal, iOp, &taintedDecl, nullptr); - for (Value* vipf : interProcFlows) { - if(Instruction* iipf = dyn_cast(vipf)) { - if (CallInst* anno_check = dyn_cast(iipf)){ - //we delete these later... creates problems - if (anno_check->getName().contains("Fresh") || - anno_check->getName().contains("Consistent") ) { - continue; - } - } - taintedDecl[iipf].insert(iOp); + if (curVal == inputInst) { +#if DEBUG + errs() << "[Loop toExplore] cur inst = orig input\n"; + errs() << "[Loop toExplore] Call traverseLocal with cur inst (tainted), orig input (srcInput), caller (none)\n"; +#endif + interProcFlows = traverseLocal(curVal, inputInst, &taintedInsts, nullptr); +#if DEBUG + errs() << "[Loop toExplore] [cur inst = orig input] Inspect interProcFlows:\n"; +#endif + for (auto* vipf : interProcFlows) { + if (auto* iipf = dyn_cast(vipf)) { + if (auto* anno_check = dyn_cast(iipf)) { + // We delete these later... creates problems + if (isAnnot(anno_check->getName())) continue; + } + +#if DEBUG + errs() << "Adding orig input (" << *inputInst << ") to set at " << *iipf << "\n"; +#endif + taintedInsts[iipf].insert(inputInst); } } - } else if (isa(currVal)) { - //note it will not be iop, even though iop is a call - //this case handles both returns and pbref - - promoted_inputs.push_back(dyn_cast(currVal)); - Value* next = toExplore.front(); + } else if (isa(curVal)) { +#if DEBUG + errs() << "[Loop toExplore] cur inst = CallInst\n"; +#endif + // Note it will not be iop, even though iop is a call + // This case handles both returns and pbref + + promotedInputs.push_back(dyn_cast(curVal)); + auto* next = toExplore.front(); toExplore.pop(); - //if the next is a return, this was a return flow - //otherwise, if it's an arg, this was pbref + // If the next is a return, this was a return flow + // Otherwise, if it's an arg, this was pbref + //? pbref - pass by reference? if (isa(next)) { - interProcFlows = traverseLocal(currVal, dyn_cast(currVal), &taintedDecl, nullptr); +#if DEBUG + errs() << "[Loop toExplore] cur inst next = Return inst (return flow)\n"; +#endif + interProcFlows = traverseLocal(curVal, dyn_cast(curVal), &taintedInsts, nullptr); for (Value* vipf : interProcFlows) { - if(Instruction* iipf = dyn_cast(vipf)) { - - //don't add self - if (currVal == vipf) { + if (Instruction* iipf = dyn_cast(vipf)) { + // don't add self + if (curVal == vipf) { continue; } - if (CallInst* anno_check = dyn_cast(iipf)){ - //we delete these later... creates problems - if (anno_check->getName().contains("Fresh") || - anno_check->getName().contains("Consistent") ) { - continue; - } - } - taintedDecl[iipf].insert(dyn_cast(currVal)); + if (CallInst* anno_check = dyn_cast(iipf)) { + // we delete these later... creates problems + if (anno_check->getName().contains("Fresh") || + anno_check->getName().contains("Consistent")) { + continue; + } + } + taintedInsts[iipf].insert(dyn_cast(curVal)); } - } + } } else if (isa(next)) { - //grab the para corresponding to the argument +#if DEBUG + errs() << "[Loop toExplore] cur inst next = Argument (pbref)\n"; +#endif + // Grab the para corresponding to the argument int index = -1; int i = 0; - CallInst* ci = dyn_cast(currVal); - + CallInst* ci = dyn_cast(curVal); - if (ci->getCalledFunction() == NULL) { - continue; + if (ci->getCalledFunction() == NULL) continue; + if (ci->getCalledFunction()->empty()) continue; + +#if DEBUG + errs() << "exploring function " << ci->getCalledFunction()->getName() << "\n"; +#endif + + for (auto& arg : ci->getCalledFunction()->args()) { + // errs() <<"arg is "<(&arg) != next) { + i++; + } else { + index = i; + } } - if (ci->getCalledFunction()->empty()) { + if (index == -1) { +#if DEBUG + errs() << "couldn't find pass by ref " << *next << "\n"; +#endif continue; } - #if DEBUG - errs() << "exploring function " << ci->getCalledFunction()->getName() << "\n"; - #endif - - for (auto& arg : ci->getCalledFunction()->args()){ - //errs() <<"arg is "<(&arg)!=next) { - i++; - } else { - index = i; - } - - } - if(index == -1){ - #if DEBUG - errs() << "couldn't find pass by ref " << *next << "\n"; - #endif - continue; + Value* tArg = ci->getArgOperand(index); + // errs() << "arg_op: "<< *arg_op<<"\n"; + // check if reference is part of an array + if (GEPOperator* gep = dyn_cast(tArg)) { + tArg = gep->getPointerOperand(); } - - Value* tArg = ci->getArgOperand(index); - //errs() << "arg_op: "<< *arg_op<<"\n"; - //check if reference is part of an array - if (GEPOperator* gep = dyn_cast(tArg)) { - tArg = gep->getPointerOperand(); - } - //if bitcast inst, - else if (BitCastInst* bci = dyn_cast(tArg)){ + // if bitcast inst, + else if (BitCastInst* bci = dyn_cast(tArg)) { tArg = bci->getOperand(0); } - //need to actually find the first use *after* the callInst - Instruction* fstUse = ptrAfterCall(tArg,ci); - if (fstUse!=nullptr && fstUse!=tArg) { - #if DEBUG + // need to actually find the first use *after* the callInst + Instruction* fstUse = ptrAfterCall(tArg, ci); + if (fstUse != nullptr && fstUse != tArg) { +#if DEBUG errs() << "First use after call: " << *fstUse << "\n"; - #endif - //if the first use is itself a callinst, then treat as a tainted para case, +#endif + // if the first use is itself a callinst, then treat as a tainted para case, val_vec visited_fstuse; visited_fstuse.push_back(ci); - - while (CallInst* ci_fstuse = dyn_cast(fstUse) ) { - //already visited, as in loop - if (find(visited_fstuse.begin(),visited_fstuse.end(), ci_fstuse) - !=visited_fstuse.end()) { - //no non-call uses + + while (CallInst* ci_fstuse = dyn_cast(fstUse)) { + // already visited, as in loop + if (find(visited_fstuse.begin(), visited_fstuse.end(), ci_fstuse) != visited_fstuse.end()) { + // no non-call uses fstUse = nullptr; break; } - if (CallInst* anno_check = dyn_cast(ci_fstuse)){ - //we delete these later... creates problems - if (anno_check->getName().contains("Fresh") || - anno_check->getName().contains("Consistent") ) { - continue; - } - } + if (CallInst* anno_check = dyn_cast(ci_fstuse)) { + // we delete these later... creates problems + if (anno_check->getName().contains("Fresh") || + anno_check->getName().contains("Consistent")) { + continue; + } + } visited_fstuse.push_back(ci_fstuse); - unsigned int arg_num = ci_fstuse->getNumArgOperands(); - + unsigned int arg_num = ci_fstuse->arg_size(); + +#if DEBUG + errs() << "[Loop customUsers] Find index of tainted arg:\n"; +#endif // Find the index of the tainted argument - for (unsigned int i = 0; i < arg_num; i++){ - #if DEBUG - errs() << "DEBUG: comparing "<< *tArg <<" and " << *(ci_fstuse->getArgOperand(i))<<"\n"; - #endif - if(ci_fstuse->getArgOperand(i)==tArg) { - #if DEBUG - // errs() << "DEBUG: pushing arg of "<< calledFunc->getName() <<"\n"; - #endif + for (unsigned int i = 0; i < arg_num; i++) { + // TODO +#if DEBUG + errs() << "comparing " << *tArg << " and " << *(ci_fstuse->getArgOperand(i)) << "\n"; +#endif + if (ci_fstuse->getArgOperand(i) == tArg) { +#if DEBUG + // errs() << "pushing arg of "<< calledFunc->getName() <<"\n"; +#endif interProcFlows.push_back((ci_fstuse->getCalledFunction()->arg_begin() + i)); - //MUST also push back the call inst. + // MUST also push back the call inst. interProcFlows.push_back(ci_fstuse); - //and the srcOp + // and the srcOp interProcFlows.push_back(ci); - + break; } } - //find next local use - //promoted_inputs.push_back(ci); - taintedDecl[ci_fstuse].insert(ci); - fstUse = ptrAfterCall(tArg,ci_fstuse); + // find next local use + // promoted_inputs.push_back(ci); + taintedInsts[ci_fstuse].insert(ci); + fstUse = ptrAfterCall(tArg, ci_fstuse); if (fstUse == nullptr) { break; } - } - //re nullptr check - if (fstUse!=nullptr) { - interProcFlows = traverseLocal(fstUse, dyn_cast(currVal), &taintedDecl, nullptr); + } + // re nullptr check + if (fstUse != nullptr) { + interProcFlows = traverseLocal(fstUse, dyn_cast(curVal), &taintedInsts, nullptr); for (Value* vipf : interProcFlows) { - if(Instruction* iipf = dyn_cast(vipf)) { - if (CallInst* anno_check = dyn_cast(iipf)){ - //we delete these later... creates problems - if (anno_check->getName().contains("Fresh") || - anno_check->getName().contains("Consistent") ) { + if (Instruction* iipf = dyn_cast(vipf)) { + if (CallInst* anno_check = dyn_cast(iipf)) { + // we delete these later... creates problems + if (anno_check->getName().contains("Fresh") || + anno_check->getName().contains("Consistent")) { continue; } } - taintedDecl[iipf].insert(dyn_cast(currVal)); + taintedInsts[iipf].insert(dyn_cast(curVal)); } } } - } + } } - } else if (isa(currVal)) { - #if DEBUG - errs() << "exploring tainted arg " << *currVal << "\n"; - #endif - Instruction* caller = dyn_cast(toExplore.front()); - - //promoted_inputs.push_back(caller); + } else if (isa(curVal)) { +#if DEBUG + errs() << "[Loop toExplore] cur inst = Argument (tainted arg)\n"; +#endif + + auto* caller = dyn_cast(toExplore.front()); toExplore.pop(); - Instruction* innerSrcOp = dyn_cast(toExplore.front()); +#if DEBUG + errs() << "[Loop toExplore] Caller: " << *caller << "\n"; +#endif + // promoted_inputs.push_back(caller); + + auto* innerInputInst = dyn_cast(toExplore.front()); toExplore.pop(); - interProcFlows = traverseLocal(currVal, innerSrcOp, &taintedDecl, caller); - - for (Value* vipf : interProcFlows) { - if(Instruction* iipf = dyn_cast(vipf)) { - if (CallInst* anno_check = dyn_cast(iipf)){ - //we delete these later... creates problems - if (anno_check->getName().contains("Fresh") || - anno_check->getName().contains("Consistent") ) { - continue; - } - } - taintedDecl[iipf].insert(innerSrcOp); - } - } - }//end elsif chain - #if DEBUG - errs() << "Finished iteration\n"; - #endif - for (Value* item : interProcFlows) { - if(item != NULL) { - //errs() <<"pushing item " << *item <<"\n"; +#if DEBUG + errs() << "[Loop toExplore] orig input: " << *innerInputInst << "\n"; + errs() << "[Loop toExplore] Call traverseLocal with cur inst (tainted), orig input (srcInput), caller\n"; +#endif + + interProcFlows = traverseLocal(curVal, innerInputInst, &taintedInsts, caller); + +#if DEBUG + errs() << "[Loop toExplore] Inspect interProcFlows:\n"; +#endif + for (auto* vipf : interProcFlows) { + if (auto* iipf = dyn_cast(vipf)) { + if (auto* anno_check = dyn_cast(iipf)) { + // We delete these later... creates problems + if (isAnnot(anno_check->getName())) continue; + } + taintedInsts[iipf].insert(innerInputInst); +#if DEBUG + errs() << "Adding innerInputInst (" << *innerInputInst << ") to set at " << *iipf << "\n"; +#endif + } + } + } // end elsif chain + + for (auto* item : interProcFlows) { + if (item != NULL) { +#if DEBUG + errs() << "Add to toExplore: " << *item << "\n"; +#endif toExplore.push(item); } else { errs() << "ERROR: encountered null interproc item\n"; } } - }//end while queue not empty - }//end for all iOp - - return taintedDecl; + +#if DEBUG + errs() << "*** Loop toExplore ***\n"; +#endif + } // end while queue not empty + } // end for all inputInsts + +#if DEBUG + errs() << "*** buildInputs ***\n"; +#endif + return taintedInsts; } -val_vec traverseLocal(Value* tainted, Instruction* srcOp, inst_insts_map* iInfo, Instruction* caller) -{ +val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* taintedInsts, Instruction* caller) { +#if DEBUG + errs() << "=== traverseLocal ===\n"; +#endif + val_vec interProcSinks; - queue localDeps; + std::queue localDeps; +#if DEBUG + errs() << "Add cur inst to localDeps\n"; +#endif localDeps.push(tainted); - while(!localDeps.empty()) { - Value* currVal = localDeps.front(); + while (!localDeps.empty()) { +#if DEBUG + errs() << "=== Loop localDeps ===\n"; +#endif + auto* curVal = localDeps.front(); localDeps.pop(); - val_vec customUsers; - if (StoreInst* si = dyn_cast(currVal)) { - //add the pointer to deps, as stores have no uses - //Add info on the store to the map - if(iInfo->find(si)!=iInfo->end()) { - if (find(iInfo->at(si).begin(), iInfo->at(si).end(), srcOp)!=iInfo->at(si).end()) { - continue; - } else { - iInfo->at(si).insert(srcOp); - } +#if DEBUG + errs() << "[Loop localDeps] cur inst: " << *curVal << "\n"; +#endif + val_vec customUsers; + if (auto* si = dyn_cast(curVal)) { +#if DEBUG + errs() << "[Loop localDeps] cur inst = StoreInst\n"; +#endif + // Add the pointer to deps, as stores have no uses + // Add info on the store to the map + if (taintedInsts->find(si) != taintedInsts->end()) { + auto insts = taintedInsts->at(si); + if (std::find(insts.begin(), insts.end(), srcInput) != insts.end()) continue; + taintedInsts->at(si).insert(srcInput); } else { - set seti; - seti.insert(srcOp); - iInfo->emplace(si, seti); + std::set seti; + seti.insert(srcInput); + taintedInsts->emplace(si, seti); } - #if DEBUG - errs() << " adding to map " << *srcOp << " for " << *si << "\n"; - #endif - //See if it is (or aliases?) one of the function arguments - for (Argument& arg : si->getFunction()->args()) { - Value* to_comp = si->getPointerOperand()->stripPointerCasts(); - #if DEBUG - errs() << " PBRef comp: " << *to_comp << " and " << arg << "\n"; - #endif - if (to_comp== &arg) { - //if taint came from inside any callsite is potentially tainted +#if DEBUG + errs() << "[Loop localDeps] Adding orig input (" << *srcInput << ") to set at cur inst (" << *si << ")\n"; +#endif + // See if it is (or aliases?) one of the function arguments (PBRef comp) + for (auto& arg : si->getFunction()->args()) { + auto* storePtr = si->getPointerOperand()->stripPointerCasts(); +#if DEBUG + errs() << "[Loop localDeps] Is ptr being stored to (" << *storePtr << ") = fun arg (" << arg << ")\n"; +#endif + if (storePtr == &arg) { + // if taint came from inside any callsite is potentially tainted if (caller == nullptr) { - for(Value* calls : si->getFunction()->users()) { + for (auto calls : si->getFunction()->users()) { interProcSinks.push_back(calls); interProcSinks.push_back(dyn_cast(&arg)); - if (Instruction* key = dyn_cast(calls)) { - //check to make sure not already visited - // iInfo->at(key).insert(srcOp); - + if (auto key = dyn_cast(calls)) { + // check to make sure not already visited + // taintedInsts->at(key).insert(srcOp); } } } else { - //otherwise, just the caller's + // otherwise, just the caller's interProcSinks.push_back(caller); interProcSinks.push_back(dyn_cast(&arg)); - if (Instruction* key = dyn_cast(caller)) { - - - //check to make sure not already visited - // iInfo->at(key).insert(srcOp); - + if (auto key = dyn_cast(caller)) { + // check to make sure not already visited + // taintedInsts->at(key).insert(srcOp); } } } } - //construct "users" of the store - #if DEBUG - errs() << "DEBUG: Store users\n"; - #endif - //add in loads that are reachable from the tainted store. - Value* ptr = si->getPointerOperand(); - //if bci, get the operand, as that's the useful ptr - if (BitCastInst* bciptr = dyn_cast(ptr) ){ - ptr = bciptr->getOperand(0); - } - for(Value* use : ptr->users()){ - if (Instruction* useOfStore = dyn_cast(use)) { - #if DEBUG - errs() << "DEBUG: checking use " << *useOfStore << "\n"; - #endif + // Construct "users" of the store +#if DEBUG + errs() << "[Loop localDeps] Add users (loads) of store to customUsers:\n"; +#endif + // Add in loads that are reachable from the tainted store. + auto* ptr = si->getPointerOperand(); + // If bci, get the operand, as that's the useful ptr + if (auto bciptr = dyn_cast(ptr)) ptr = bciptr->getOperand(0); + for (auto* use : ptr->users()) { + if (auto* useOfStore = dyn_cast(use)) { if (storePrecedesUse(useOfStore, si)) { +#if DEBUG + errs() << "[Loop Store Users] store precedes this use, add:" << *useOfStore << "\n"; +#endif customUsers.push_back(useOfStore); } } } - //update currVal to be the pointer - currVal = si->getPointerOperand(); + // Update curVal to be the pointer + curVal = si->getPointerOperand(); - //if it's a gepi, see if there are others that occur afterwards + // If it's a gepi, see if there are others that occur afterwards if (isa(si->getPointerOperand())) { inst_vec matching = couldMatchGEPI(dyn_cast(si->getPointerOperand())); - for (Instruction* item : matching) { + for (auto item : matching) { localDeps.push(item); } - //check pbref, need to compare op of the gepi, not gepi itself - for (Argument& arg : si->getFunction()->args()) { - #if DEBUG - errs() << " PBRef comp: " << *dyn_cast(currVal)->getOperand(0) << " and " << arg << "\n"; - #endif - if (dyn_cast(currVal)->getOperand(0) == &arg) { - //if taint came from inside any callsite is potentially tainted + // check pbref, need to compare op of the gepi, not gepi itself + for (auto& arg : si->getFunction()->args()) { +#if DEBUG + errs() << " PBRef comp: " << *dyn_cast(curVal)->getOperand(0) << " and " << arg << "\n"; +#endif + if (dyn_cast(curVal)->getOperand(0) == &arg) { + // if taint came from inside any callsite is potentially tainted if (caller == nullptr) { - for(Value* calls : si->getFunction()->users()) { + for (Value* calls : si->getFunction()->users()) { interProcSinks.push_back(calls); interProcSinks.push_back(dyn_cast(&arg)); if (Instruction* key = dyn_cast(calls)) { - - // iInfo->at(key).insert(srcOp); + // taintedInsts->at(key).insert(srcOp); } } } else { - //otherwise, just the caller's + // otherwise, just the caller's interProcSinks.push_back(caller); interProcSinks.push_back(dyn_cast(&arg)); if (Instruction* key = dyn_cast(caller)) { - - // iInfo->at(key).insert(srcOp); + // taintedInsts->at(key).insert(srcOp); } } } - } + } } - + } else { - //if not a store, do normal users of currval - customUsers.insert(customUsers.end(), currVal->user_begin(), currVal->user_end()); +#if DEBUG + errs() << "[Loop localDeps] cur inst != StoreInst\n"; + errs() << "[Loop localDeps] Add users of cur inst to customUsers:\n"; + for (auto* use : curVal->users()) errs() << *use << "\n"; +#endif + // If not a store, do normal users of curVal + customUsers.insert(customUsers.end(), curVal->user_begin(), curVal->user_end()); } - - - - for (Value* use : customUsers) { - - //check that the use of a tainted pointer is really tainted - - //this is checking if the use is a tainted store - - if (ReturnInst* ri = dyn_cast(use)) { - #if DEBUG - errs() << "DEBUG: in return case\n"; - #endif +#if DEBUG + errs() << "[Loop localDeps] Go over uses\n"; +#endif + //* Here we may cross over to another procedure + for (auto* use : customUsers) { + // Check that the use of a tainted pointer is really tainted + + // This is checking if the use is a tainted store + + if (auto ri = dyn_cast(use)) { +#if DEBUG + errs() << "[Loop customUsers] use = ReturnInst\n"; +#endif if (caller == nullptr) { - for(Value* calls : ri->getFunction()->users()) { - if(CallInst* ci = dyn_cast(calls)) { +#if DEBUG + errs() << "[Loop customUsers] No caller\n"; +#endif + for (auto calls : ri->getFunction()->users()) { + if (auto ci = dyn_cast(calls)) { interProcSinks.push_back(calls); - //extra for bookkeeping + // extra for bookkeeping interProcSinks.push_back(use); } } } else { - //otherwise, just the caller's +#if DEBUG + errs() << "[Loop customUsers] Some caller\n"; +#endif + // otherwise, just the caller's interProcSinks.push_back(caller); - //extra for bookkeeping + // extra for bookkeeping interProcSinks.push_back(use); } - - } else if (isa(use)) { - #if DEBUG - errs() << "DEBUG: in call case\n"; - #endif - //Add the right argument to the list - CallInst* ci = dyn_cast(use); - Function* calledFunc = ci ->getCalledFunction(); - if (calledFunc == NULL || calledFunc->empty()) { - //special case for llvm.memcpy - //See if it is (or aliases?) one of the function arguments - if (calledFunc!=NULL && calledFunc->hasName() && - calledFunc->getName().contains("llvm.memcpy")) { - //errs() << "DEBUG: memcpy " << *ci << "\n"; + } else if (auto* ci = dyn_cast(use)) { +#if DEBUG + errs() << "[Loop customUsers] use = CallInst\n"; +#endif + // Add the right argument to the list + auto* calledFun = ci->getCalledFunction(); + if (calledFun == NULL || calledFun->empty()) { + // special case for llvm.memcpy + // See if it is (or aliases?) one of the function arguments + if (calledFun != NULL && calledFun->hasName() && + calledFun->getName().contains("llvm.memcpy")) { + // errs() << "memcpy " << *ci << "\n"; Value* src = ci->getOperand(1)->stripPointerCasts(); Value* dest = ci->getOperand(0); - // errs() << "DEBUG: with dest " << *dest << "\n"; + // errs() << "with dest " << *dest << "\n"; if (BitCastInst* bci = dyn_cast(dest)) { dest = bci->getOperand(0); - } + } if (GetElementPtrInst* gepi = dyn_cast(dest)) { dest = gepi->getOperand(0); - // errs() << "DEBUG: and gepi dest " << *dest << "\n"; + // errs() << "and gepi dest " << *dest << "\n"; } bool found = false; for (Argument& arg : ci->getFunction()->args()) { - //Value* to_comp = - #if DEBUG +// Value* to_comp = +#if DEBUG errs() << " PBRef comp: " << *dest << " and " << arg << "\n"; - #endif - if (dest== &arg) { +#endif + if (dest == &arg) { found = true; - //if taint came from inside any callsite is potentially tainted + // if taint came from inside any callsite is potentially tainted if (caller == nullptr) { - for(Value* calls : ci->getFunction()->users()) { + for (Value* calls : ci->getFunction()->users()) { interProcSinks.push_back(calls); interProcSinks.push_back(dyn_cast(&arg)); if (Instruction* key = dyn_cast(calls)) { - - // iInfo->at(key).insert(srcOp); + // taintedInsts->at(key).insert(srcOp); } } } else { - //otherwise, just the caller's + // otherwise, just the caller's interProcSinks.push_back(caller); interProcSinks.push_back(dyn_cast(&arg)); if (Instruction* key = dyn_cast(caller)) { - // iInfo->at(key).insert(srcOp); + // taintedInsts->at(key).insert(srcOp); } } } } - //it wasn't pbref, just "store", so find fst ptr after call - //and also put in iInfo + // it wasn't pbref, just "store", so find fst ptr after call + // and also put in taintedInsts if (!found) { - Value* destFst = ptrAfterCall(dest,ci); - - - //in case of loop - if (destFst !=ci->getOperand(0)) { - // errs () << "found a memcpy store " << *destFst <<"\n"; - if(iInfo->find(ci)!=iInfo->end()) { - if (find(iInfo->at(ci).begin(), iInfo->at(ci).end(), srcOp)!=iInfo->at(ci).end()) { + Value* destFst = ptrAfterCall(dest, ci); + + // in case of loop + if (destFst != ci->getOperand(0)) { + // errs () << "found a memcpy store " << *destFst <<"\n"; + if (taintedInsts->find(ci) != taintedInsts->end()) { + if (find(taintedInsts->at(ci).begin(), taintedInsts->at(ci).end(), srcInput) != taintedInsts->at(ci).end()) { continue; } else { - iInfo->at(ci).insert(srcOp); + taintedInsts->at(ci).insert(srcInput); } } else { - set seti; - seti.insert(srcOp); - iInfo->emplace(ci, seti); + std::set seti; + seti.insert(srcInput); + taintedInsts->emplace(ci, seti); } localDeps.push(destFst); } - } - } //end memcpy check - - //conservative tainting decision - if (calledFunc->empty()) { - - //if it's empty but declared in our mod (one of the passed in C ones) - //and it returns a value, then consider the taint passed to the - //return - if (!calledFunc->getName().contains("llvm") && - !calledFunc->getName().contains("core")) { - #if DEBUG - errs() << "DEBUG: pushing presumed c lib func " << calledFunc->getName() << "\n"; - #endif - localDeps.push(ci); - } - + } + } // end memcpy check + + // conservative tainting decision + if (calledFun->empty()) { + // if it's empty but declared in our mod (one of the passed in C ones) + // and it returns a value, then consider the taint passed to the + // return + if (!calledFun->getName().contains("llvm") && + !calledFun->getName().contains("core")) { +#if DEBUG + errs() << "pushing presumed c lib func " << calledFun->getName() << "\n"; +#endif + localDeps.push(ci); + } } continue; - } - unsigned int arg_num = ci->getNumArgOperands(); - - // Find the index of the tainted argument - for (unsigned int i = 0; i < arg_num; i++){ - #if DEBUG - errs() << "DEBUG: comparing "<< *currVal <<" and " << *(ci->getArgOperand(i))<<"\n"; - #endif - if(ci->getArgOperand(i)==currVal) { - #if DEBUG - errs() << "DEBUG: pushing arg of "<< calledFunc->getName() <<"\n"; - #endif - interProcSinks.push_back((calledFunc->arg_begin() + i)); - //MUST also push back the call inst. + + unsigned int arg_num = ci->arg_size(); +#if DEBUG + errs() << "[Loop customUsers] Find tainted arg of " << calledFun->getName() << "\n"; +#endif + // Find the index of the tainted argument + for (unsigned int i = 0; i < arg_num; i++) { + auto* arg = ci->getArgOperand(i); + if (arg == curVal) { + auto funArg = calledFun->arg_begin() + i; +#if DEBUG + errs() << "Found tainted arg: " << *arg << ", add fun arg (" << *funArg << "), the use (" << *ci << "), and orig input (" << *srcInput << ") to interProcFlows\n"; +#endif + interProcSinks.push_back(funArg); + // MUST also push back the call inst. interProcSinks.push_back(ci); - //MUST also push back the current srcOp - interProcSinks.push_back(srcOp); - if (Instruction* key = dyn_cast(ci)) { - // iInfo->at(key).insert(srcOp); + // MUST also push back the current srcInput + interProcSinks.push_back(srcInput); + if (auto* key = dyn_cast(ci)) { + // taintedInsts->at(key).insert(srcOp); } - break; - } - } - - } else if (Instruction* iUse = dyn_cast(use)) { + break; + } + } + } else if (auto* iUse = dyn_cast(use)) { +#if DEBUG + errs() << "[Loop customUsers] use != ReturnInst & use != CallInst\n"; +#endif if (iUse->isTerminator()) { if (iUse->getNumSuccessors() > 1) { - //Add control deps off of a branch. - #if DEBUG - errs() << "DEBUG: adding condeps case\n"; - #endif +// Add control deps off of a branch. +#if DEBUG + errs() << "adding condeps case\n"; +#endif val_vec controlDeps = getControlDeps(iUse); - //for all condep, add any reached loads, and add the store to the map - for (Value* item : controlDeps) { - if (StoreInst* siCon = dyn_cast(item)) { + // for all condep, add any reached loads, and add the store to the map + for (auto* item : controlDeps) { + if (auto* siCon = dyn_cast(item)) { localDeps.push(siCon); } - }//end for vals in condep + } } - }//end terminator check - #if DEBUG - //errs() << "DEBUG: pushing "<< *iUse<<"\n"; - #endif + } + +#if DEBUG + errs() << "[Loop customUsers] Add use to localDeps\n"; +#endif + //* Here we may push inst from another procedure, crossing boundaries localDeps.push(iUse); } } +#if DEBUG + errs() << "*** Loop localDeps ***\n"; +#endif } +#if DEBUG + errs() << "*** traverseLocal ***\n"; +#endif return interProcSinks; } - - -inst_vec findInputInsts(Module* M) -{ - inst_vec sources; - func_vec io_name; - //Find io name annotations - for(GlobalVariable& gv : M->globals()) { - if(gv.getName().contains("IO_NAME")) { - - if( Function* fp = dyn_cast(gv.getInitializer()->getOperand(0)->stripPointerCasts())) { - #if DEBUG - errs() << "Found io inst "<< fp->getName() <<"\n"; - #endif - io_name.push_back(fp); +inst_vec findInputInsts(Module* M) { +#if DEBUG + errs() << "findInputInsts\n"; +#endif + inst_vec inputInsts; + + // Find IO_NAME annotations + for (auto& gv : M->globals()) { + if (gv.getName().starts_with("IO_NAME")) { + if (auto* fp = dyn_cast(gv.getInitializer())) { +#if DEBUG + errs() << "Found IO fun: " << fp->getName() << "\n"; +#endif + // Now, search for calls to those functions + for (auto& F : *M) { + for (auto& B : F) { + for (auto& I : B) { + if (auto* ci = dyn_cast(&I)) { + if (fp == ci->getCalledFunction()) { +#if DEBUG + errs() << "Found IO call: " << I << "\n"; +#endif + inputInsts.push_back(&I); + break; + } + } + } + } + } } else { - errs() << "ERROR: could not unwrap function pointer from annotation\n"; + // TODO: Say something else + errs() << "[ERROR] Could not unwrap function pointer from annotation\n"; } } - } - - //now, search for calls to those functions - for (Function& func : * M) { - for (BasicBlock& bb : func) { - for(Instruction& inst : bb) { - if(CallInst* ci = dyn_cast(&inst)) { - if(find(io_name.begin(), io_name.end(),ci->getCalledFunction())!=io_name.end()) { - sources.push_back(&inst); - } - } - } - - } } - return sources; -} + return inputInsts; +} -/*See if a particular store is exposed to a use -- possibly replace couldLoadTainted*/ +// See if a particular store is exposed to a use -- possibly replace couldLoadTainted bool storePrecedesUse(Instruction* use, StoreInst* toMatch) { - queue to_visit; - vector visited; + std::queue to_visit; + std::vector visited; BasicBlock* current; - vector possible; + std::vector possible; int found = 0; int skip = 1; - + to_visit.push(use->getParent()); - while(!to_visit.empty()) { + while (!to_visit.empty()) { current = to_visit.front(); to_visit.pop(); - - for(BasicBlock::reverse_iterator i = current->rbegin(), e = current->rend(); i!=e;++i) { + + for (BasicBlock::reverse_iterator i = current->rbegin(), e = current->rend(); i != e; ++i) { Instruction* inst = &*i; - //don't look at li block before li - if((current == use->getParent())&&(skip)) { - //errs() << "skipping" << *inst <<"\n"; - if(use==inst){ - skip = 0; - } - continue; + // don't look at li block before li + if ((current == use->getParent()) && (skip)) { + // errs() << "skipping" << *inst <<"\n"; + if (use == inst) { + skip = 0; + } + continue; + } + // if(BI!=nullptr) { + // errs() << "looking at" << *BI <<"\n"; + if (StoreInst* si = dyn_cast(inst)) { + // errs() << "found a store" << *si <<"\n"; + if (si->getPointerOperand() == toMatch->getPointerOperand()) { + possible.push_back(si); + found = 1; + break; + } } - //if(BI!=nullptr) { - //errs() << "looking at" << *BI <<"\n"; - if (StoreInst* si = dyn_cast(inst)) { - //errs() << "found a store" << *si <<"\n"; - if (si->getPointerOperand() == toMatch->getPointerOperand()) { - possible.push_back(si); - found = 1; - break; - } - } } - //we found a store in this node - if(found) { + // we found a store in this node + if (found) { found = 0; continue; } /*add pred. blocks to our queue*/ for (auto PI = pred_begin(current); PI != pred_end(current); ++PI) { - //if it's new - if(!(find(visited.begin(), visited.end(), *PI) != visited.end())){ - visited.push_back(*PI); - to_visit.push(*PI); + // if it's new + if (!(find(visited.begin(), visited.end(), *PI) != visited.end())) { + visited.push_back(*PI); + to_visit.push(*PI); } } } /*Was one of the preceding writes the store in question?*/ - for(Value* poss : possible) { - if(poss == toMatch) { - return true; + for (Value* poss : possible) { + if (poss == toMatch) { + return true; } - } - //this use does not consume the tainted store + // this use does not consume the tainted store return false; } - /*See if the same EP is used in multiple GEPI, check if exposed*/ inst_vec couldMatchGEPI(GetElementPtrInst* tGEPI) { - queue to_visit; - vector visited; + std::queue to_visit; + std::vector visited; BasicBlock* current; - vector possible; + std::vector possible; inst_vec matching; int found = 0; int skip = 1; - + to_visit.push(tGEPI->getParent()); - while(!to_visit.empty()) { + while (!to_visit.empty()) { current = to_visit.front(); to_visit.pop(); - - //forwards exploration - for(Instruction& i : *current) { + + // forwards exploration + for (Instruction& i : *current) { Instruction* inst = &i; - //don't look at gepi block before gepi - if((current == tGEPI->getParent())&&(skip)) { - //errs() << "skipping" << *inst <<"\n"; - if(tGEPI==inst){ - skip = 0; - } - continue; + // don't look at gepi block before gepi + if ((current == tGEPI->getParent()) && (skip)) { + // errs() << "skipping" << *inst <<"\n"; + if (tGEPI == inst) { + skip = 0; + } + continue; } - //if(BI!=nullptr) { - //errs() << "looking at" << *BI <<"\n"; - if (GetElementPtrInst* another = dyn_cast(inst)) { - //errs() << "found a store" << *si <<"\n"; - //check if the ops match - if (another->getPointerOperand() == tGEPI->getPointerOperand()) { - //check if used in load or store - for (Value* pUse : another->users()) { + // if(BI!=nullptr) { + // errs() << "looking at" << *BI <<"\n"; + if (GetElementPtrInst* another = dyn_cast(inst)) { + // errs() << "found a store" << *si <<"\n"; + // check if the ops match + if (another->getPointerOperand() == tGEPI->getPointerOperand()) { + // check if used in load or store + for (Value* pUse : another->users()) { if (isa(pUse)) { found = 1; break; } } - //no store + // no store if (!found) { - #if DEBUG - errs() << "matching GEPS: " << *another<<" and " << *tGEPI <<"\n"; - #endif +#if DEBUG + errs() << "matching GEPS: " << *another << " and " << *tGEPI << "\n"; +#endif matching.push_back(another); } - } - } + } + } } - //we found a store in this node - if(found) { + // we found a store in this node + if (found) { found = 0; continue; } /*add succ. blocks to our queue*/ for (auto SI = succ_begin(current); SI != succ_end(current); ++SI) { - //if it's new - if(!(find(visited.begin(), visited.end(), *SI) != visited.end())){ - visited.push_back(*SI); - to_visit.push(*SI); + // if it's new + if (!(find(visited.begin(), visited.end(), *SI) != visited.end())) { + visited.push_back(*SI); + to_visit.push(*SI); } } } - + return matching; } /*Find first use of a pointer after a callInst, for pass-by-ref*/ Instruction* ptrAfterCall(Value* ptr, CallInst* ci) { - queue to_visit; - vector visited; + std::queue to_visit; + std::vector visited; BasicBlock* current; - + int found = 0; int skip = 1; - + to_visit.push(ci->getParent()); - while(!to_visit.empty()) { + while (!to_visit.empty()) { current = to_visit.front(); to_visit.pop(); - - //forwards exploration - for(Instruction& i : *current) { + + // forwards exploration + for (Instruction& i : *current) { Instruction* inst = &i; - //don't look at gepi block before gepi - if((current == ci->getParent())&&(skip)) { - //errs() << "skipping" << *inst <<"\n"; - if(ci==inst){ - skip = 0; - } - continue; + // don't look at gepi block before gepi + if ((current == ci->getParent()) && (skip)) { + // errs() << "skipping" << *inst <<"\n"; + if (ci == inst) { + skip = 0; + } + continue; } - //if the inst is a use of the pointer - if (find(ptr->user_begin(),ptr->user_end(), inst)!=ptr->user_end()) { + // if the inst is a use of the pointer + if (std::find(ptr->user_begin(), ptr->user_end(), inst) != ptr->user_end()) { return inst; } - } /*add succ. blocks to our queue*/ for (auto SI = succ_begin(current); SI != succ_end(current); ++SI) { - //if it's new - if(!(find(visited.begin(), visited.end(), *SI) != visited.end())){ - visited.push_back(*SI); - to_visit.push(*SI); + // if it's new + if (!(find(visited.begin(), visited.end(), *SI) != visited.end())) { + visited.push_back(*SI); + to_visit.push(*SI); } } } return nullptr; } - -/*This is a function to return all the control dependent stores off of a control inst -Input -- ti, the (formerly) terminator inst +/*This is a function to return all the control dependent stores off of a control inst +Input -- ti, the (formerly) terminator inst Output -- list of deps */ -val_vec getControlDeps(Instruction* ti) -{ +val_vec getControlDeps(Instruction* ti) { val_vec deps; int succ_i = 0; while (succ_i < ti->getNumSuccessors()) { BasicBlock* bb = ti->getSuccessor(succ_i); succ_i++; - for(Instruction& inst : *bb) { - //if we encounter a store, add to deps - if(isa(&inst)) { - deps.push_back(&inst); - } //if we encounter a multi succ branch, recursive call, if we encouter a join, continue to next succ - else if(inst.isTerminator()) { - - if(ti->getNumSuccessors() > 1) { - vector intermed = getControlDeps(&inst); - for(Value* item : intermed) { - deps.push_back(item); - } - } else { - break; - } + for (Instruction& inst : *bb) { + // if we encounter a store, add to deps + if (isa(&inst)) { + deps.push_back(&inst); + } // if we encounter a multi succ branch, recursive call, if we encouter a join, continue to next succ + else if (inst.isTerminator()) { + if (ti->getNumSuccessors() > 1) { + std::vector intermed = getControlDeps(&inst); + for (Value* item : intermed) { + deps.push_back(item); + } + } else { + break; + } } } } return deps; } - -/*Get direct uses (at src level, not IR) of a fresh var*/ -inst_vec traverseDirectUses(Instruction* root) -{ +// Get direct uses (at src level, not IR) of a fresh var +inst_vec traverseDirectUses(Instruction* root) { inst_vec uses; - queue localDeps; + std::queue localDeps; localDeps.push(root); - - //Edge case: check if return is an internally allocated stack var + + // Edge case: check if return is an internally allocated stack var Value* retPtr; Instruction* last = &(root->getFunction()->back().back()); if (ReturnInst* ri = dyn_cast(last)) { for (Use& op : ri->operands()) { - if(LoadInst* li = dyn_cast(op.get())) { + if (LoadInst* li = dyn_cast(op.get())) { retPtr = li->getPointerOperand(); } } - } - while(!localDeps.empty()) { + while (!localDeps.empty()) { Instruction* currVal = localDeps.front(); uses.push_back(currVal); localDeps.pop(); for (Value* use : currVal->users()) { - //if it's a gepi, see if there are others that occur afterwards - // errs() << *use <<" is a direct use of " << *currVal<<"\n"; + // if it's a gepi, see if there are others that occur afterwards + // errs() << *use <<" is a direct use of " << *currVal<<"\n"; if (isa(use)) { inst_vec matching = couldMatchGEPI(dyn_cast(use)); for (Instruction* item : matching) { - // errs() << "pushing to local deps " << *item <<"\n"; + // errs() << "pushing to local deps " << *item <<"\n"; localDeps.push(item); } - } - else if (ReturnInst* ri = dyn_cast(use)) { - for(Value* calls : ri->getFunction()->users()) { - if(isa(calls)) { + } else if (ReturnInst* ri = dyn_cast(use)) { + for (Value* calls : ri->getFunction()->users()) { + if (isa(calls)) { uses.push_back(dyn_cast(calls)); - } } } else if (StoreInst* si = dyn_cast(use)) { - //if stores into ret pointer, treat as above + // if stores into ret pointer, treat as above if (si->getPointerOperand() == retPtr) { - for(Value* calls : si->getFunction()->users()) { - if(isa(calls)) { - uses.push_back(dyn_cast(calls)); - + for (Value* calls : si->getFunction()->users()) { + if (isa(calls)) { + uses.push_back(dyn_cast(calls)); } - } + } } } else if (BranchInst* bi = dyn_cast(use)) { - //if a use is a branch inst the atomic region needs to - //dominate the successors + // if a use is a branch inst the atomic region needs to + // dominate the successors for (BasicBlock* bbInterior : bi->successors()) { - //skip panic blocks, otherwise there will be no post dom + // skip panic blocks, otherwise there will be no post dom if (bbInterior->getName().equals("panic")) { continue; } uses.push_back(&(bbInterior->front())); } } else if (CallInst* ci = dyn_cast(use)) { - if(ci->hasName() && ci->getName().startswith("_")) { - //fall through + if (ci->hasName() && ci->getName().startswith("_")) { + // fall through } else { uses.push_back(ci); continue; } } if (Instruction* iUse = dyn_cast(use)) { - //see if load is to another var or just internal ssa + // see if load is to another var or just internal ssa if (LoadInst* li = dyn_cast(iUse)) { - if(li->hasName()) { - //Hacky --verify that this is always true - if(!li->getName().startswith("_")) { + if (li->hasName()) { + // Hacky --verify that this is always true + if (!li->getName().startswith("_")) { continue; } } @@ -853,5 +911,3 @@ inst_vec traverseDirectUses(Instruction* root) return uses; } - - diff --git a/ocelot/AtomicRegionInference/src/include/ConsistentInference.h b/ocelot/AtomicRegionInference/src/include/ConsistentInference.h deleted file mode 100644 index 1f7a429..0000000 --- a/ocelot/AtomicRegionInference/src/include/ConsistentInference.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef __CONSISTENTINFERENCE__ -#define __CONSISTENTINFERENCE__ - -#include "HelperTypes.h" -using namespace llvm; -using namespace std; - -class ConsistentInference { -public: - ConsistentInference(Pass* _pass, Module* _m, Function* _as, Function* _ae) { - pass = _pass; - m = _m; - atomStart = _as; - atomEnd = _ae; - } - void inferConsistent(map allSets); - void inferFresh(inst_vec_vec allSets); - void addRegion(inst_vec conSet, int regType); - Function* commonPredecessor(map blocks, Function* root); - Instruction* insertRegionInst(int regInst, Instruction* insertBefore); - bool sameFunction(map blockMap); - Instruction* truncate(BasicBlock* bb, bool forwards, inst_vec conSet, set nested); - vector deepCaller(Function* root, vector funcList, Function** goal); - inst_inst_pair findSmallest(vectorregionsFound); - BasicBlock* getLoopEnd(BasicBlock* bb); - bool loopCheck(BasicBlock* bb); - int getSubLength(BasicBlock* bb, Instruction* end, vector visited); - - - -private: - Pass* pass; - Module* m; - Function* atomStart; - Function* atomEnd; -}; - -#endif diff --git a/ocelot/AtomicRegionInference/src/include/HelperTypes.h b/ocelot/AtomicRegionInference/src/include/HelperTypes.h index 843c498..9565b1f 100644 --- a/ocelot/AtomicRegionInference/src/include/HelperTypes.h +++ b/ocelot/AtomicRegionInference/src/include/HelperTypes.h @@ -1,52 +1,41 @@ -#ifndef __HELPERTYPES__ -#define __HELPERTYPES__ - -#include "llvm/Pass.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/ADT/ilist.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/SymbolTableListTraits.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CallGraph.h" -#include -#include -#include -#include -//#include - -#define DEBUG 0 - -using namespace llvm; - -typedef std::vector val_vec; -typedef std::vector bb_vec; -typedef std::vector inst_vec; -typedef std::map val_insts_map; -typedef std::vector gv_vec; -typedef std::vector> val_inst_vec; -typedef std::vector> inst_inst_vec; -typedef std::map inst_vals_map; -typedef std::map> inst_insts_map; -typedef std::vector func_vec; -typedef std::vector inst_vec_vec; -typedef std::pair inst_inst_pair; - -extern gv_vec gv_list; - -/*bool isArray(Value* v); -bool isTask(Function* F); -bool isMemcpy(Instruction* I); -uint64_t getSize(Value* val); -int is_atomic_boundary(Instruction* ci); -#define OVERHEAD 0 -*/ -#endif +#ifndef __HELPERTYPES__ +#define __HELPERTYPES__ + +#include +#include +#include +#include + +#include "llvm/ADT/ilist.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/SymbolTableListTraits.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +#define DEBUG 1 + +using namespace llvm; + +typedef std::vector val_vec; +typedef std::vector bb_vec; +typedef std::vector inst_vec; +typedef std::map val_insts_map; +typedef std::vector gv_vec; +typedef std::vector> val_inst_vec; +typedef std::pair inst_inst_pair; +typedef std::vector inst_inst_vec; +typedef std::map inst_vals_map; +typedef std::map> inst_insts_map; +typedef std::vector func_vec; +typedef std::vector inst_vec_vec; + +extern gv_vec gv_list; + +#endif diff --git a/ocelot/AtomicRegionInference/src/include/Helpers.h b/ocelot/AtomicRegionInference/src/include/Helpers.h new file mode 100644 index 0000000..8e940f0 --- /dev/null +++ b/ocelot/AtomicRegionInference/src/include/Helpers.h @@ -0,0 +1,15 @@ +#ifndef __HELPERS__ +#define __HELPERS__ + +#include + +#include "HelperTypes.h" + +using namespace llvm; + +std::string getSimpleNodeLabel(const Value* Node); +bool isAnnot(const StringRef annotName); +void printInstInsts(const inst_insts_map& iim, bool onlyCalls = false); +void printInsts(const inst_vec& iv); + +#endif \ No newline at end of file diff --git a/ocelot/AtomicRegionInference/src/include/InferAtomicPass.h b/ocelot/AtomicRegionInference/src/include/InferAtomicPass.h deleted file mode 100644 index bd0036a..0000000 --- a/ocelot/AtomicRegionInference/src/include/InferAtomicPass.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef __INFERPASS__ -#define __INFERPASS__ - -#include "HelperTypes.h" -#include "ConsistentInference.h" -#include "llvm/ADT/APInt.h" -#include "llvm/IR/Verifier.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/ExecutionEngine/MCJIT.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include - -using namespace llvm; - -class InferAtomicModulePass : public ModulePass { - public: - static char ID; - InferAtomicModulePass() : ModulePass(ID) {} - - virtual bool runOnModule(Module &M); - int getMaxCost(Function* f); - void mergeRegions(Function* f); - void getAnnotations(map* conSets, inst_vec_vec* freshVars, inst_insts_map inputs, inst_vec* toDelete); - inst_vec_vec collectFresh(inst_vec_vec startingPoints, inst_insts_map info); - map collectCon(map startingPointa, inst_insts_map inputMap); - void removeAnnotations(inst_vec* toDelete); - - - virtual void getAnalysisUsage(AnalysisUsage& AU) const { - AU.setPreservesAll(); - //AU.addRequired(); - //AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - Module* getModule() { - return m; - } - Module* setModule(Module* _m) { - return m = _m; - } - private: - Module* m; - int capacitorSize; - Function* atomStart; - Function* atomEnd; - - -}; - -#endif diff --git a/ocelot/AtomicRegionInference/src/include/InferAtoms.h b/ocelot/AtomicRegionInference/src/include/InferAtoms.h new file mode 100644 index 0000000..19701e0 --- /dev/null +++ b/ocelot/AtomicRegionInference/src/include/InferAtoms.h @@ -0,0 +1,54 @@ +#ifndef __INFERATOMS__ +#define __INFERATOMS__ + +#include +#include +#include +#include +#include +#include + +#include "Helpers.h" +#include "InferFreshCons.h" +#include "TaintTracker.h" +#include "llvm/ADT/APInt.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Pass.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +struct InferAtomsPass : public PassInfoMixin { + public: + InferAtomsPass() {} + PreservedAnalyses run(Module& M, ModuleAnalysisManager& AM); + + void getAnnotations(std::map* consVars, inst_vec_vec* freshVars, inst_insts_map inputMap, inst_vec* toDelete); + inst_vec_vec collectFresh(inst_vec_vec startingPoints, inst_insts_map info); + std::map collectCons(std::map startingPointa, inst_insts_map inputMap); + void removeAnnotations(inst_vec* toDelete); + void setModule(Module* _M) { M = _M; } + + private: + Module* M; + Function* atomStart; + Function* atomEnd; +}; + +extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo +llvmGetPassPluginInfo() { + return { + .APIVersion = LLVM_PLUGIN_API_VERSION, + .PluginName = "Atomic Region Inference Pass", + .PluginVersion = "v0.1", + .RegisterPassBuilderCallbacks = [](PassBuilder& PB) { + PB.registerPipelineStartEPCallback( + [](ModulePassManager& MPM, OptimizationLevel Level) { + MPM.addPass(InferAtomsPass()); + }); + }}; +} + +#endif diff --git a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h new file mode 100644 index 0000000..e9defee --- /dev/null +++ b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h @@ -0,0 +1,36 @@ +#ifndef __INFERFRESHCONS__ +#define __INFERFRESHCONS__ + +#include "Helpers.h" + +using namespace llvm; + +struct InferFreshCons { + public: + InferFreshCons(FunctionAnalysisManager* _FAM, Module* _m, Function* _as, Function* _ae) { + FAM = _FAM; + m = _m; + atomStart = _as; + atomEnd = _ae; + } + void inferConsistent(std::map allSets); + void inferFresh(inst_vec_vec allSets); + void addRegion(inst_vec conSet, int regType); + Function* findCandidate(std::map blocks, Function* root); + Instruction* insertRegionInst(int regInst, Instruction* insertBefore); + bool sameFunction(std::map blockMap); + Instruction* truncate(BasicBlock* bb, bool forwards, inst_vec conSet, std::set nested); + std::vector deepCaller(Function* root, std::vector& funcList, Function** goal); + inst_inst_pair findShortest(inst_inst_vec regionsFound); + BasicBlock* getLoopEnd(BasicBlock* bb); + bool loopCheck(BasicBlock* bb); + int getSubLength(BasicBlock* bb, Instruction* end, std::vector visited); + + private: + FunctionAnalysisManager* FAM; + Module* m; + Function* atomStart; + Function* atomEnd; +}; + +#endif diff --git a/ocelot/AtomicRegionInference/src/include/TaintTracker.h b/ocelot/AtomicRegionInference/src/include/TaintTracker.h index ffd90ef..1d7eaf7 100644 --- a/ocelot/AtomicRegionInference/src/include/TaintTracker.h +++ b/ocelot/AtomicRegionInference/src/include/TaintTracker.h @@ -1,40 +1,17 @@ -#include "llvm/Pass.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/CFLSteensAliasAnalysis.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include -#include -#include -#include -#include -#include -#include -#include "HelperTypes.h" +#ifndef __TAINTTRACKER__ +#define __TAINTTRACKER__ -using namespace llvm; -using namespace std; +#include "Helpers.h" +using namespace llvm; inst_insts_map buildInputs(Module* m); val_vec traverseLocal(Value* tainted, Instruction* srcOp, inst_insts_map* buildMap, Instruction* caller); - -inst_vec findInputInsts(Module* M); -Instruction* ptrAfterCall(Value* ptr, CallInst* ci); +inst_vec findInputInsts(Module* M); +Instruction* ptrAfterCall(Value* ptr, CallInst* ci); bool storePrecedesUse(Instruction* use, StoreInst* toMatch); inst_vec couldMatchGEPI(GetElementPtrInst* tGEPI); val_vec getControlDeps(Instruction* ti); inst_vec traverseDirectUses(Instruction* root); + +#endif From c7fd8d0757eb1dbc08e17ffe55e356116a7cacb6 Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Sat, 16 Dec 2023 23:22:31 -0500 Subject: [PATCH 02/18] [InferAtomsPass] Makefile to simplify testing Useful extensible shortcuts to running tests. --- .gitignore | 3 ++- benchmarks/ctests/example01.c | 3 --- ocelot/AtomicRegionInference/Makefile | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 ocelot/AtomicRegionInference/Makefile diff --git a/.gitignore b/.gitignore index 5326aab..225f44b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .vscode -ocelot/AtomicRegionInference/build \ No newline at end of file +ocelot/AtomicRegionInference/build +benchmarks/ctests/*.ll \ No newline at end of file diff --git a/benchmarks/ctests/example01.c b/benchmarks/ctests/example01.c index 0e61a67..4b5b66f 100644 --- a/benchmarks/ctests/example01.c +++ b/benchmarks/ctests/example01.c @@ -1,8 +1,5 @@ #include -// int x; -// int y; - void Fresh(int x) { printf("Fresh\n"); } void Consistent(int x, int id) { printf("Consistent\n"); } diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile new file mode 100644 index 0000000..41a8cf5 --- /dev/null +++ b/ocelot/AtomicRegionInference/Makefile @@ -0,0 +1,20 @@ +.PHONY: clean_tests clean eg1 eg2 + +eg1: + TEST=example01 make test +eg2: + TEST=example02 make test + +test: + $(MAKE) -C build all + clang -S -emit-llvm\ + -fpass-plugin=build/src/InferAtomsPass.dylib\ + -fno-discard-value-names\ + ../../benchmarks/ctests/$(TEST).c\ + -o ../../benchmarks/ctests/$(TEST).ll + +clean_tests: + find ../../benchmarks/ctests -name "*.ll" -exec rm -rf {} \; + +clean: + rm -rf build From 58855d2a5cc9c3f6fbb64a5b6cf28e85ba312d1e Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Sun, 17 Dec 2023 23:08:35 -0500 Subject: [PATCH 03/18] [WIP][InferAtomsPass] Instruction scheduling Step 1 of optimizing atomic regions for (smaller) size. In essence, it's now necessary to have a complete picture of which instructions are tainted (whereas before we really only needed to know the boundaries of a region). Test plan: `make eg3` for an example where the freshness atomic region size is reduced thanks to the optimization. --- .gitignore | 4 +- benchmarks/ctests/example03.c | 19 ++ ocelot/AtomicRegionInference/Makefile | 6 + ocelot/AtomicRegionInference/README.md | 3 + .../AtomicRegionInference/src/InferAtoms.cpp | 11 +- .../src/InferFreshCons.cpp | 200 ++++++++++++------ .../src/TaintTracker.cpp | 140 ++++++++++-- .../src/include/HelperTypes.h | 5 +- .../src/include/Helpers.h | 3 + .../src/include/InferFreshCons.h | 11 +- .../src/include/TaintTracker.h | 1 + 11 files changed, 312 insertions(+), 91 deletions(-) create mode 100644 benchmarks/ctests/example03.c diff --git a/.gitignore b/.gitignore index 225f44b..17712eb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .vscode ocelot/AtomicRegionInference/build -benchmarks/ctests/*.ll \ No newline at end of file +benchmarks/ctests/*.ll + +.DS_Store \ No newline at end of file diff --git a/benchmarks/ctests/example03.c b/benchmarks/ctests/example03.c new file mode 100644 index 0000000..98b9d0d --- /dev/null +++ b/benchmarks/ctests/example03.c @@ -0,0 +1,19 @@ +void Fresh(int x) {} +void Consistent(int x, int id) {} + +void atomic_start() {} +void atomic_end() {} + +int input() { return 0; } +int (*IO_NAME)() = input; + +void log(int x) {} + +void app() { + int x = input(); + int y = 1; + int z = y + 1; + log(z); + log(x); + Fresh(x); +} \ No newline at end of file diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile index 41a8cf5..9ab940c 100644 --- a/ocelot/AtomicRegionInference/Makefile +++ b/ocelot/AtomicRegionInference/Makefile @@ -4,9 +4,15 @@ eg1: TEST=example01 make test eg2: TEST=example02 make test +eg3: + TEST=example03 make test test: $(MAKE) -C build all + clang -S -emit-llvm\ + -fno-discard-value-names\ + ../../benchmarks/ctests/$(TEST).c\ + -o ../../benchmarks/ctests/$(TEST).orig.ll clang -S -emit-llvm\ -fpass-plugin=build/src/InferAtomsPass.dylib\ -fno-discard-value-names\ diff --git a/ocelot/AtomicRegionInference/README.md b/ocelot/AtomicRegionInference/README.md index 38c61e6..5895b8c 100644 --- a/ocelot/AtomicRegionInference/README.md +++ b/ocelot/AtomicRegionInference/README.md @@ -16,3 +16,6 @@ You may bootstrap Clang to use the pass to compile a C file like so: ```sh clang -S -emit-llvm -fpass-plugin=src/InferAtomsPass.dylib -fno-discard-value-names ../../../benchmarks/ctests/example01.c ``` + +Or, when testing, use the shortcuts provided in the Makefile (e.g., `make eg1`), +which produce two LLVM IRs with and without the pass enabled. diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp index 3843383..b0219cd 100644 --- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp +++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp @@ -219,6 +219,7 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ #if DEBUG errs() << "[Loop Inst] Fresh arg: " << *arg << "\n"; #endif + if (auto* inst = dyn_cast(arg)) { #if DEBUG errs() << "[Loop Inst] arg = Instruction, add to v\n"; @@ -240,7 +241,7 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ #endif if (ptrUse != inst) { if (auto* liUse = dyn_cast(ptrUse)) { - errs() << "[Loop ptr users] Diff LoadInst ptrUse, add to v\n"; + errs() << "[Loop ptr users] ptrUse diff from Fresh arg, add to v\n"; v.emplace(liUse); } } @@ -443,23 +444,23 @@ inst_vec_vec InferAtomsPass::collectFresh(inst_vec_vec freshVars, inst_insts_map errs() << "[Loop freshVars] Go over varSet:\n"; printInsts(varSet); #endif - std::set unique, callChain; + inst_set unique, callChain; for (auto* var : varSet) { #if DEBUG errs() << "[Loop varSet] Cur var: " << *var << "\n"; #endif // Uses (forwards) are direct only (might need a little chaining for direct in rs to be direct in IR) - inst_vec uses = traverseDirectUses(var); + inst_vec uses = traverseUses(var); #if DEBUG errs() << "[Loop varSet] Go over uses of var\n"; #endif for (auto* use : uses) { #if DEBUG - errs() << "[Loop uses] Cur use: " << *use << "\n"; - errs() << "[Loop uses] Add use to unique\n"; + errs() << "[Loop uses] Add use: " << *use << "\n"; #endif unique.insert(use); + for (auto* input : inputMap[use]) { #if DEBUG errs() << "[Loop inputMap[use]] Add src input of use to unique: " << *input << "\n"; diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index 6258661..0e1f93b 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -2,24 +2,23 @@ #include "llvm/Analysis/PostDominators.h" -Instruction* InferFreshCons::insertRegionInst(int toInsertType, Instruction* insertBefore) { +Instruction* InferFreshCons::insertRegionInst(InsertKind insertKind, Instruction* insertBefore) { #if DEBUG errs() << "=== insertRegionInst ===\n"; #endif Instruction* call; IRBuilder<> builder(insertBefore); - // Insert a region start inst - if (toInsertType == 0) { + + if (insertKind == Start) { #if DEBUG errs() << "Insert start before: " << *insertBefore << "\n"; #endif call = builder.CreateCall(this->atomStart); } else { - // Insert a region end inst #if DEBUG errs() << "Insert end before: " << *insertBefore << "\n"; #endif - call = builder.CreateCall(atomEnd); + call = builder.CreateCall(this->atomEnd); } #if DEBUG @@ -31,10 +30,11 @@ Instruction* InferFreshCons::insertRegionInst(int toInsertType, Instruction* ins // If a direct pred is also a successor, then it's a for loop block bool InferFreshCons::loopCheck(BasicBlock* B) { auto BName = getSimpleNodeLabel(B); + if (!B->hasNPredecessors(1)) { for (auto it = pred_begin(B), et = pred_end(B); it != et; ++it) { - BasicBlock* predecessor = *it; - StringRef pname = predecessor->getName().drop_front(2); + auto* predecessor = *it; + auto pname = predecessor->getName().drop_front(2); // errs() << "comparing " << pname<< " and " < 0) { // errs() << "comparison is true\n"; @@ -42,13 +42,14 @@ bool InferFreshCons::loopCheck(BasicBlock* B) { } } } + return false; } // Find the first block after a for loop BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) { - Instruction* ti = bb->getTerminator(); - BasicBlock* end = ti->getSuccessor(0); + auto* ti = bb->getTerminator(); + auto* end = ti->getSuccessor(0); ti = end->getTerminator(); // errs() << "end is " << end->getName() << "\n"; // for switch inst, succ 0 is the fall through @@ -64,7 +65,7 @@ void InferFreshCons::inferConsistent(std::map consSets) { #if DEBUG errs() << "[InferConsistent] starting set " << id << "\n"; #endif - addRegion(set, 0); + addRegion(set, Consistent); } } @@ -74,31 +75,31 @@ void InferFreshCons::inferFresh(inst_vec_vec freshSets) { errs() << "=== inferFresh ===\n"; #endif // TODO: start with pseudo code structure from design doc - for (auto set : freshSets) addRegion(set, 1); + for (auto freshSet : freshSets) addRegion(freshSet, Fresh); #if DEBUG errs() << "*** inferFresh ***\n"; #endif } -// Region type: 0 for Consistent, 1 for Fresh -void InferFreshCons::addRegion(inst_vec set, int regionType) { +void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { #if DEBUG errs() << "=== addRegion ===\n"; #endif // A map from set item to bb - std::map blocks; + std::map targetBlocks; // A queue of regions that still need to be processed std::queue> regionsNeeded; #if DEBUG errs() << "Build map from inst to bb\n"; #endif - for (auto* item : set) blocks[item] = item->getParent(); + for (auto* targetInst : targetInsts) + targetBlocks[targetInst] = targetInst->getParent(); #if DEBUG errs() << "Add map to regionsNeeded\n"; #endif - regionsNeeded.push(blocks); + regionsNeeded.push(targetBlocks); auto* root = m->getFunction("app"); @@ -106,56 +107,53 @@ void InferFreshCons::addRegion(inst_vec set, int regionType) { inst_inst_vec regionsFound; while (!regionsNeeded.empty()) { // Need to raise all blocks in the map until they are the same - auto blockMap = regionsNeeded.front(); + auto blocks = regionsNeeded.front(); regionsNeeded.pop(); - // Record which functions have been travelled through - std::set nested; + // Record which functions have been traveled through + std::set seenFuns; #if DEBUG - errs() << "[Loop regionsNeeded] Check if blocks are in diff functions\n"; + errs() << "[Loop regionsNeeded] While blocks are in diff functions\n"; #endif - while (!sameFunction(blockMap)) { + while (!sameFunction(blocks)) { // To think on: does this change? - auto* goal = findCandidate(blockMap, root); + auto* goal = findCandidate(blocks, root); #if DEBUG - errs() << "[Loop !sameFunction] Go over each item in set\n"; + errs() << "[Loop !sameFunction] Go over each targetInst\n"; #endif - for (auto* item : set) { + for (auto* targetInst : targetInsts) { // not all blocks need to be moved up - Function* currFunc = blockMap[item]->getParent(); - nested.insert(currFunc); - if (currFunc != goal) { + auto* curFun = blocks[targetInst]->getParent(); + seenFuns.insert(curFun); + if (curFun != goal) { // if more than one call: // callChain info is already in the starting set // so only explore a caller if it's in conSet bool first = true; - for (User* use : currFunc->users()) { - // if (regionType == 1) { - if (!(find(set.begin(), set.end(), use) != set.end())) { + for (auto* use : curFun->users()) { + // if (regionKind == 1) { + if (!(find(targetInsts.begin(), targetInsts.end(), use) != targetInsts.end())) continue; - } // errs() << "Use: "<< *use << " is in call chain\n"; //} - Instruction* inst = dyn_cast(use); + auto* inst = dyn_cast(use); #if DEBUGINFER errs() << "DEBUGINFER: examining use: " << *inst << "\n"; #endif if (inst == NULL) { - // errs () <<"ERROR: use " << *use << "not an instruction\n"; + // errs () << "ERROR: use " << *use << "not an instruction\n"; break; } // update the original map if (first) { - blockMap[item] = inst->getParent(); + blocks[targetInst] = inst->getParent(); first = false; } else { // copy the blockmap, update, add to queue - Instruction* inst = dyn_cast(use); + auto* inst = dyn_cast(use); std::map copy; - for (auto map : blockMap) { - copy[map.first] = map.second; - } - copy[item] = inst->getParent(); + for (auto map : blocks) copy[map.first] = map.second; + copy[targetInst] = inst->getParent(); regionsNeeded.push(copy); } } // end forall uses @@ -168,34 +166,114 @@ void InferFreshCons::addRegion(inst_vec set, int regionType) { #if DEBUG errs() << "[Loop regionsNeeded] Start dom tree analysis\n"; #endif - auto* home = blockMap.begin()->second->getParent(); - if (home == nullptr) { + + auto* homeFun = blocks.begin()->second->getParent(); + if (homeFun == nullptr) { #if DEBUG - errs() << "[Loop regionsNeeded] No function found\n"; + errs() << "[regionsNeeded] No function found\n"; #endif continue; } #if DEBUG - errs() << "[Loop regionsNeeded] Found home fun: " << home->getName() << "\n"; + errs() << "[regionsNeeded] Found home fun: " << homeFun->getName() << "\n"; +#endif + +#if OPT +#if DEBUG + errs() << "[regionsNeeded] Go over all block insts\n"; +#endif + // auto* B = blocks.begin()->second; + std::set seenBlocks; + for (auto& [_, B] : blocks) { + if (seenBlocks.find(B) == seenBlocks.end()) { + seenBlocks.emplace(B); + + std::vector toDelay; + std::vector toDelete; + + for (auto& I : *B) { +#if DEBUG + errs() << I << "\n"; +#endif + if (!isa(I) && find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end()) { +#if DEBUG + errs() << "Should be delayed\n"; +#endif + Instruction *prev, *clone; + if (isa(I)) { + if (I.getOpcode() == Instruction::Add) + clone = BinaryOperator::Create(Instruction::Add, prev, I.getOperand(1)); + else + clone = I.clone(); + } else if (isa(I)) { + clone = I.clone(); + } else if (auto* ci = dyn_cast(&I)) { + clone = CallInst::Create(ci->getCalledFunction(), prev); + } else if (auto* si = dyn_cast(&I)) { + if (prev != nullptr && find(targetInsts.begin(), targetInsts.end(), prev) == targetInsts.end()) { + clone = I.clone(); + clone->setOperand(0, prev); + errs() << "yo\n"; + } else + clone = I.clone(); + } else + clone = I.clone(); + prev = clone; + + toDelete.push_back(&I); + toDelay.push_back(clone); + } + } + + IRBuilder builder(B); + for (auto* d : toDelay) { + // #if DEBUG + // errs() << "Delayed: " << *d << "\n"; + // #endif + builder.Insert(d); + } + + auto I = B->begin(); + for (; I != B->end();) { +#if DEBUG + errs() << *I << "\n"; +#endif + if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) { +#if DEBUG + errs() << "Delete\n"; +#endif + I = I->eraseFromParent(); + } else + I++; + } + +#if DEBUG + errs() << "After: " << *B << "\n"; #endif - auto& domTree = FAM->getResult(*home); + } + } +#endif + + auto& domTree = FAM->getResult(*homeFun); // Find the closest point that dominates - auto* startDom = blockMap.begin()->second; - for (auto& [_, B] : blockMap) { + auto* startDom = blocks.begin()->second; + for (auto& [_, B] : blocks) startDom = domTree.findNearestCommonDominator(B, startDom); - } #if DEBUG errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n"; #endif -// TODO: if an inst in the set is in the bb, we can truncate? + + // TODO: if an inst in the set is in the bb, we can truncate? + #if DEBUG errs() << "Start post dom tree analysis\n"; #endif + // Flip directions for the region end - auto& postDomTree = FAM->getResult(*home); + auto& postDomTree = FAM->getResult(*homeFun); // Find the closest point that dominates - auto* endDom = blockMap.begin()->second; - for (auto map : blockMap) { + auto* endDom = blocks.begin()->second; + for (auto& [_, block] : blocks) { #if DEBUGINFER if (endDom != nullptr) { errs() << "Finding post dom of: " << getSimpleNodeLabel(map.second) << " and " << getSimpleNodeLabel(endDom) << "\n"; @@ -203,8 +281,9 @@ void InferFreshCons::addRegion(inst_vec set, int regionType) { errs() << "endDom is null\n"; } #endif - endDom = postDomTree.findNearestCommonDominator(map.second, endDom); + endDom = postDomTree.findNearestCommonDominator(block, endDom); } + #if DEBUG errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n"; #endif @@ -214,9 +293,11 @@ void InferFreshCons::addRegion(inst_vec set, int regionType) { } else if (endDom == nullptr) { errs() << "[Error] Null endDom\n"; } + // Need to make the start and end dominate each other as well. startDom = domTree.findNearestCommonDominator(startDom, endDom); endDom = postDomTree.findNearestCommonDominator(startDom, endDom); + #if DEBUG errs() << "[Loop regionsNeeded] After matching scope\n"; errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n"; @@ -241,8 +322,8 @@ void InferFreshCons::addRegion(inst_vec set, int regionType) { #endif // TODO: fallback if endDom is null? Need hyper-blocks, I think // possibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations? - auto* regionStart = truncate(startDom, true, set, nested); - auto* regionEnd = truncate(endDom, false, set, nested); + auto* regionStart = truncate(startDom, true, targetInsts, seenFuns); + auto* regionEnd = truncate(endDom, false, targetInsts, seenFuns); if (regionStart == nullptr) { errs() << "[Error] Null startDom after truncation\n"; } else if (regionEnd == nullptr) { @@ -262,8 +343,8 @@ void InferFreshCons::addRegion(inst_vec set, int regionType) { // each other, so there's no possibility of not running into the start from // the end auto [regionStart, regionEnd] = findShortest(regionsFound); - insertRegionInst(0, regionStart); - insertRegionInst(1, regionEnd); + insertRegionInst(Start, regionStart); + insertRegionInst(End, regionEnd); //}//end while regions needed #if DEBUG @@ -355,7 +436,6 @@ Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set return &B->front(); } -// findCandidate Function* InferFreshCons::findCandidate(std::map blockMap, Function* root) { #if DEBUG errs() << "== findCandidate ===\n"; @@ -374,8 +454,8 @@ Function* InferFreshCons::findCandidate(std::map bloc // Easy case: everything is already in the same function if (funList.size() == 1) return funList.at(0); - /* Algo Goal: get the deepest function that still calls (or is) all funcs in funcList. - * Consider: multiple calls? Should be dealt with in the add region function -- eventually each caller + /* Algo goal: get the deepest function that still calls (or is) all funcs in funcList. + * Consider: multiple calls? Should be dealt with in the addRegion -- eventually each caller * gets its own region */ Function* goal = nullptr; @@ -393,7 +473,7 @@ Function* InferFreshCons::findCandidate(std::map bloc return goal; } -/*Recursive: from a root, returns list of called funcs. */ +// From a root, returns list of called functions. std::vector InferFreshCons::deepCaller(Function* root, std::vector& funList, Function** goal) { std::vector calledFuncs; bool mustIncludeSelf = false; diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp index ee22ad8..f80f7ce 100644 --- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp +++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp @@ -833,31 +833,50 @@ val_vec getControlDeps(Instruction* ti) { // Get direct uses (at src level, not IR) of a fresh var inst_vec traverseDirectUses(Instruction* root) { +#if DEBUG + errs() << "=== traverseDirectUses ===\n"; +#endif inst_vec uses; std::queue localDeps; +#if DEBUG + errs() << "Add root to localDeps: " << *root << "\n"; +#endif localDeps.push(root); // Edge case: check if return is an internally allocated stack var Value* retPtr; - Instruction* last = &(root->getFunction()->back().back()); - if (ReturnInst* ri = dyn_cast(last)) { - for (Use& op : ri->operands()) { - if (LoadInst* li = dyn_cast(op.get())) { + auto* last = &(root->getFunction()->back().back()); + if (auto* ri = dyn_cast(last)) { + for (auto& op : ri->operands()) { + if (auto* li = dyn_cast(op.get())) { retPtr = li->getPointerOperand(); +#if DEBUG + errs() << "retPtr: " << *retPtr << "\n"; +#endif } } } while (!localDeps.empty()) { - Instruction* currVal = localDeps.front(); - uses.push_back(currVal); + auto* curVal = localDeps.front(); +#if DEBUG + errs() << "[Loop localDeps] Add curVal to uses: " << *curVal << "\n"; +#endif + uses.push_back(curVal); localDeps.pop(); - for (Value* use : currVal->users()) { - // if it's a gepi, see if there are others that occur afterwards + +#if DEBUG + errs() << "[Loop localDeps] Go over curVal users\n"; +#endif + for (auto* use : curVal->users()) { +#if DEBUG + errs() << "[Loop users] use: " << *use << "\n"; +#endif + // If it's a gepi, see if there are others that occur afterwards // errs() << *use <<" is a direct use of " << *currVal<<"\n"; if (isa(use)) { - inst_vec matching = couldMatchGEPI(dyn_cast(use)); - for (Instruction* item : matching) { + auto matching = couldMatchGEPI(dyn_cast(use)); + for (auto* item : matching) { // errs() << "pushing to local deps " << *item <<"\n"; localDeps.push(item); } @@ -868,8 +887,14 @@ inst_vec traverseDirectUses(Instruction* root) { } } } else if (StoreInst* si = dyn_cast(use)) { - // if stores into ret pointer, treat as above +#if DEBUG + errs() << "[Loop users] use = StoreInst\n"; +#endif + // If stores into ret pointer, treat as above if (si->getPointerOperand() == retPtr) { +#if DEBUG + errs() << "[Loop users] ptr operand = retPtr\n"; +#endif for (Value* calls : si->getFunction()->users()) { if (isa(calls)) { uses.push_back(dyn_cast(calls)); @@ -877,37 +902,112 @@ inst_vec traverseDirectUses(Instruction* root) { } } } else if (BranchInst* bi = dyn_cast(use)) { - // if a use is a branch inst the atomic region needs to + // If a use is a branch inst the atomic region needs to // dominate the successors for (BasicBlock* bbInterior : bi->successors()) { - // skip panic blocks, otherwise there will be no post dom + // Skip panic blocks, otherwise there will be no post dom if (bbInterior->getName().equals("panic")) { continue; } uses.push_back(&(bbInterior->front())); } } else if (CallInst* ci = dyn_cast(use)) { +#if DEBUG + errs() << "[Loop users] use = CallInst\n"; +#endif if (ci->hasName() && ci->getName().startswith("_")) { - // fall through + // Fall through } else { +#if DEBUG + errs() << "[Loop users] Add CallInst to uses\n"; +#endif uses.push_back(ci); continue; } } - if (Instruction* iUse = dyn_cast(use)) { - // see if load is to another var or just internal ssa - if (LoadInst* li = dyn_cast(iUse)) { + + if (auto* iUse = dyn_cast(use)) { + // See if load is to another var or just internal ssa + if (auto* li = dyn_cast(iUse)) { if (li->hasName()) { - // Hacky --verify that this is always true - if (!li->getName().startswith("_")) { + // Hacky -- verify that this is always true + if (!li->getName().startswith("_")) continue; - } } } + +#if DEBUG + errs() << "[Loop users] Add use to localDeps\n"; +#endif localDeps.push(iUse); } } } +#if DEBUG + errs() << "*** traverseDirectUses ***\n"; +#endif return uses; } + +inst_vec traverseUses(Instruction* root) { +#if DEBUG + errs() << "=== traverseUses ===\n"; +#endif + auto directUses = traverseDirectUses(root); + inst_set uses(directUses.begin(), directUses.end()); + + for (auto* directUse : directUses) { +#if DEBUG + errs() << "[directUses] directUse: " << *directUse << "\n"; +#endif + + if (auto* si = dyn_cast(directUse)) { +#if DEBUG + errs() << "[directUses] directUse = StoreInst\n"; +#endif + + auto* ptr = si->getPointerOperand(); +#if DEBUG + errs() << "[directUses] ptr operand: " << *ptr << "\n"; +#endif + + for (auto* ptrUse : ptr->users()) { + if (auto* li = dyn_cast(ptrUse)) { +#if DEBUG + errs() << "[ptrUsers] Add ptrUse (LoadInst) to uses: " << *ptrUse << "\n"; +#endif + uses.emplace(li); + + for (auto* liUse : li->users()) { + if (auto* ci = dyn_cast(liUse)) { +#if DEBUG + errs() << "[liUsers] Add liUse (CallInst) to uses: " << *liUse << "\n"; +#endif + uses.emplace(ci); + } + } + } + } + } else if (auto* li = dyn_cast(directUse)) { +#if DEBUG + errs() << "[directUses] directUse = LoadInst\n"; +#endif + auto* ptr = li->getPointerOperand(); + for (auto* ptrUse : ptr->users()) { + if (auto* si = dyn_cast(ptrUse)) { +#if DEBUG + errs() << "[ptrUses] Add ptrUse (StoreInst) to uses: " << *si << "\n"; +#endif + uses.emplace(si); + } + } + } + } + +#if DEBUG + errs() << "=== traverseUses ===\n"; +#endif + inst_vec uses_vec(uses.begin(), uses.end()); + return uses_vec; +} diff --git a/ocelot/AtomicRegionInference/src/include/HelperTypes.h b/ocelot/AtomicRegionInference/src/include/HelperTypes.h index 9565b1f..29efcc0 100644 --- a/ocelot/AtomicRegionInference/src/include/HelperTypes.h +++ b/ocelot/AtomicRegionInference/src/include/HelperTypes.h @@ -19,20 +19,19 @@ #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#define DEBUG 1 - using namespace llvm; typedef std::vector val_vec; typedef std::vector bb_vec; typedef std::vector inst_vec; +typedef std::set inst_set; typedef std::map val_insts_map; typedef std::vector gv_vec; typedef std::vector> val_inst_vec; typedef std::pair inst_inst_pair; typedef std::vector inst_inst_vec; typedef std::map inst_vals_map; -typedef std::map> inst_insts_map; +typedef std::map inst_insts_map; typedef std::vector func_vec; typedef std::vector inst_vec_vec; diff --git a/ocelot/AtomicRegionInference/src/include/Helpers.h b/ocelot/AtomicRegionInference/src/include/Helpers.h index 8e940f0..bfb6901 100644 --- a/ocelot/AtomicRegionInference/src/include/Helpers.h +++ b/ocelot/AtomicRegionInference/src/include/Helpers.h @@ -7,6 +7,9 @@ using namespace llvm; +#define DEBUG 1 +#define OPT 1 + std::string getSimpleNodeLabel(const Value* Node); bool isAnnot(const StringRef annotName); void printInstInsts(const inst_insts_map& iim, bool onlyCalls = false); diff --git a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h index e9defee..b3fcd10 100644 --- a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h +++ b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h @@ -13,11 +13,18 @@ struct InferFreshCons { atomStart = _as; atomEnd = _ae; } + + enum RegionKind { Fresh, + Consistent }; + + enum InsertKind { Start, + End }; + void inferConsistent(std::map allSets); void inferFresh(inst_vec_vec allSets); - void addRegion(inst_vec conSet, int regType); + void addRegion(inst_vec conSet, RegionKind regionKind); Function* findCandidate(std::map blocks, Function* root); - Instruction* insertRegionInst(int regInst, Instruction* insertBefore); + Instruction* insertRegionInst(InsertKind insertKind, Instruction* insertBefore); bool sameFunction(std::map blockMap); Instruction* truncate(BasicBlock* bb, bool forwards, inst_vec conSet, std::set nested); std::vector deepCaller(Function* root, std::vector& funcList, Function** goal); diff --git a/ocelot/AtomicRegionInference/src/include/TaintTracker.h b/ocelot/AtomicRegionInference/src/include/TaintTracker.h index 1d7eaf7..ea3ce03 100644 --- a/ocelot/AtomicRegionInference/src/include/TaintTracker.h +++ b/ocelot/AtomicRegionInference/src/include/TaintTracker.h @@ -13,5 +13,6 @@ bool storePrecedesUse(Instruction* use, StoreInst* toMatch); inst_vec couldMatchGEPI(GetElementPtrInst* tGEPI); val_vec getControlDeps(Instruction* ti); inst_vec traverseDirectUses(Instruction* root); +inst_vec traverseUses(Instruction* root); #endif From cde1b66aad5c00785863b7e1804f278e3938417d Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Mon, 18 Dec 2023 22:30:46 -0500 Subject: [PATCH 04/18] [InferAtomsPass] Unignore .ll files in ctests and add more comments --- .gitignore | 1 - benchmarks/ctests/example01.ll | 61 ++++++++++++ benchmarks/ctests/example02.ll | 79 +++++++++++++++ benchmarks/ctests/example02.orig.ll | 97 +++++++++++++++++++ benchmarks/ctests/example03.ll | 64 ++++++++++++ benchmarks/ctests/example03.orig.ll | 82 ++++++++++++++++ .../src/InferFreshCons.cpp | 30 +++--- 7 files changed, 398 insertions(+), 16 deletions(-) create mode 100644 benchmarks/ctests/example01.ll create mode 100644 benchmarks/ctests/example02.ll create mode 100644 benchmarks/ctests/example02.orig.ll create mode 100644 benchmarks/ctests/example03.ll create mode 100644 benchmarks/ctests/example03.orig.ll diff --git a/.gitignore b/.gitignore index 17712eb..6f9ba50 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ .vscode ocelot/AtomicRegionInference/build -benchmarks/ctests/*.ll .DS_Store \ No newline at end of file diff --git a/benchmarks/ctests/example01.ll b/benchmarks/ctests/example01.ll new file mode 100644 index 0000000..c4e8656 --- /dev/null +++ b/benchmarks/ctests/example01.ll @@ -0,0 +1,61 @@ +; ModuleID = '../../benchmarks/ctests/example01.c' +source_filename = "../../benchmarks/ctests/example01.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@.str = private unnamed_addr constant [7 x i8] c"Fresh\0A\00", align 1 +@.str.1 = private unnamed_addr constant [12 x i8] c"Consistent\0A\00", align 1 +@IO_NAME1 = global ptr @tmp, align 8 + +declare i32 @printf(ptr noundef, ...) #0 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #1 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #1 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @tmp() #1 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #1 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @app() #1 { +entry: + %x = alloca i32, align 4 + call void @atomic_start() + %call = call i32 @tmp() + store i32 %call, ptr %x, align 4 + %0 = load i32, ptr %x, align 4 + call void @log(i32 noundef %0) + call void @atomic_end() + ret i32 0 +} + +attributes #0 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} diff --git a/benchmarks/ctests/example02.ll b/benchmarks/ctests/example02.ll new file mode 100644 index 0000000..06281bb --- /dev/null +++ b/benchmarks/ctests/example02.ll @@ -0,0 +1,79 @@ +; ModuleID = '../../benchmarks/ctests/example02.c' +source_filename = "../../benchmarks/ctests/example02.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @sense, align 8 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @sense() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @norm(i32 noundef %t) #0 { +entry: + %t.addr = alloca i32, align 4 + store i32 %t, ptr %t.addr, align 4 + %0 = load i32, ptr %t.addr, align 4 + ret i32 %0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @tmp() #0 { +entry: + %t = alloca i32, align 4 + %t_norm = alloca i32, align 4 + %call = call i32 @sense() + store i32 %call, ptr %t, align 4 + %0 = load i32, ptr %t, align 4 + %call1 = call i32 @norm(i32 noundef %0) + store i32 %call1, ptr %t_norm, align 4 + %1 = load i32, ptr %t_norm, align 4 + ret i32 %1 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + call void @atomic_start() + %call = call i32 @tmp() + store i32 %call, ptr %x, align 4 + %0 = load i32, ptr %x, align 4 + call void @log(i32 noundef %0) + call void @atomic_end() + ret void +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} diff --git a/benchmarks/ctests/example02.orig.ll b/benchmarks/ctests/example02.orig.ll new file mode 100644 index 0000000..8eccea6 --- /dev/null +++ b/benchmarks/ctests/example02.orig.ll @@ -0,0 +1,97 @@ +; ModuleID = '../../benchmarks/ctests/example02.c' +source_filename = "../../benchmarks/ctests/example02.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @sense, align 8 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Fresh(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Consistent(i32 noundef %x, i32 noundef %id) #0 { +entry: + %x.addr = alloca i32, align 4 + %id.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + store i32 %id, ptr %id.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @sense() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @norm(i32 noundef %t) #0 { +entry: + %t.addr = alloca i32, align 4 + store i32 %t, ptr %t.addr, align 4 + %0 = load i32, ptr %t.addr, align 4 + ret i32 %0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @tmp() #0 { +entry: + %t = alloca i32, align 4 + %t_norm = alloca i32, align 4 + %call = call i32 @sense() + store i32 %call, ptr %t, align 4 + %0 = load i32, ptr %t, align 4 + %call1 = call i32 @norm(i32 noundef %0) + store i32 %call1, ptr %t_norm, align 4 + %1 = load i32, ptr %t_norm, align 4 + ret i32 %1 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %call = call i32 @tmp() + store i32 %call, ptr %x, align 4 + %0 = load i32, ptr %x, align 4 + call void @Fresh(i32 noundef %0) + %1 = load i32, ptr %x, align 4 + call void @log(i32 noundef %1) + ret void +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} diff --git a/benchmarks/ctests/example03.ll b/benchmarks/ctests/example03.ll new file mode 100644 index 0000000..f47c6b7 --- /dev/null +++ b/benchmarks/ctests/example03.ll @@ -0,0 +1,64 @@ +; ModuleID = '../../benchmarks/ctests/example03.c' +source_filename = "../../benchmarks/ctests/example03.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %y = alloca i32, align 4 + %z = alloca i32, align 4 + call void @atomic_start() + %call = call i32 @input() + store i32 %call, ptr %x, align 4 + %0 = load i32, ptr %x, align 4 + call void @log(i32 noundef %0) + call void @atomic_end() + store i32 1, ptr %y, align 4 + %1 = load i32, ptr %y, align 4 + %2 = add i32 %1, 1 + store i32 %2, ptr %z, align 4 + %3 = load i32, ptr %z, align 4 + call void @log(i32 %3) + ret void +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} diff --git a/benchmarks/ctests/example03.orig.ll b/benchmarks/ctests/example03.orig.ll new file mode 100644 index 0000000..89676a7 --- /dev/null +++ b/benchmarks/ctests/example03.orig.ll @@ -0,0 +1,82 @@ +; ModuleID = '../../benchmarks/ctests/example03.c' +source_filename = "../../benchmarks/ctests/example03.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Fresh(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Consistent(i32 noundef %x, i32 noundef %id) #0 { +entry: + %x.addr = alloca i32, align 4 + %id.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + store i32 %id, ptr %id.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %y = alloca i32, align 4 + %z = alloca i32, align 4 + %call = call i32 @input() + store i32 %call, ptr %x, align 4 + store i32 1, ptr %y, align 4 + %0 = load i32, ptr %y, align 4 + %add = add nsw i32 %0, 1 + store i32 %add, ptr %z, align 4 + %1 = load i32, ptr %z, align 4 + call void @log(i32 noundef %1) + %2 = load i32, ptr %x, align 4 + call void @log(i32 noundef %2) + %3 = load i32, ptr %x, align 4 + call void @Fresh(i32 noundef %3) + ret void +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index 0e1f93b..727a6d1 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -182,14 +182,13 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { #if DEBUG errs() << "[regionsNeeded] Go over all block insts\n"; #endif - // auto* B = blocks.begin()->second; std::set seenBlocks; for (auto& [_, B] : blocks) { if (seenBlocks.find(B) == seenBlocks.end()) { seenBlocks.emplace(B); - std::vector toDelay; - std::vector toDelete; + inst_vec toDelay; + inst_set toDelete; for (auto& I : *B) { #if DEBUG @@ -200,45 +199,46 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { errs() << "Should be delayed\n"; #endif Instruction *prev, *clone; + + // Clone each untainted instruction to be inserted to + // the end of the basic block if (isa(I)) { if (I.getOpcode() == Instruction::Add) clone = BinaryOperator::Create(Instruction::Add, prev, I.getOperand(1)); else clone = I.clone(); - } else if (isa(I)) { - clone = I.clone(); } else if (auto* ci = dyn_cast(&I)) { - clone = CallInst::Create(ci->getCalledFunction(), prev); + if (prev != nullptr) + clone = CallInst::Create(ci->getCalledFunction(), prev); } else if (auto* si = dyn_cast(&I)) { if (prev != nullptr && find(targetInsts.begin(), targetInsts.end(), prev) == targetInsts.end()) { clone = I.clone(); clone->setOperand(0, prev); - errs() << "yo\n"; } else clone = I.clone(); } else clone = I.clone(); + + // Keep track of the previous instruction to allow LLVM + // to remap virtual registers (avoiding 's) prev = clone; - toDelete.push_back(&I); + toDelete.emplace(&I); toDelay.push_back(clone); } } IRBuilder builder(B); - for (auto* d : toDelay) { - // #if DEBUG - // errs() << "Delayed: " << *d << "\n"; - // #endif - builder.Insert(d); - } + // Insert each delayed instruction to the end of the block + for (auto* d : toDelay) builder.Insert(d); auto I = B->begin(); + // Delete their duplicates earlier in the block for (; I != B->end();) { #if DEBUG errs() << *I << "\n"; #endif - if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) { + if (toDelete.find(&*I) != toDelete.end()) { #if DEBUG errs() << "Delete\n"; #endif From c772992cd18d8cb3ded330afa18a27ac0d807df5 Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Tue, 30 Jan 2024 22:09:36 -0500 Subject: [PATCH 05/18] [InferAtomsPass] Generalize instruction scheduling The optimization is now much more robust against general source programs. Freshness annotations now work pretty well! The main fix to the previous setup involves a mapping from old instructions to cloned ones. Since cloning an instruction (e.g., BinaryOperator) doesn't automatically clone its operands, this mapping is required to help replace the operands of cloned instructions with the clones of those operands. Cloning is the only approach to such replacements due to the LLVM IR being in SSA form. Test plan: Run examples01/02/03 to see the tranformations. For example, ```sh make eg3 ``` Before optimization: ```llvm define void @app() #0 { entry: %x = alloca i32, align 4 %y = alloca i32, align 4 %z = alloca i32, align 4 call void @atomic_start() ; <--- START %call = call i32 @input() store i32 %call, ptr %x, align 4 store i32 1, ptr %y, align 4 %0 = load i32, ptr %y, align 4 %add = add nsw i32 %0, 1 store i32 %add, ptr %z, align 4 %1 = load i32, ptr %z, align 4 call void @log(i32 noundef %1) %2 = load i32, ptr %x, align 4 call void @log(i32 noundef %2) call void @atomic_end() ; <--- END ret void } ``` After optimization: ```llvm define void @app() #0 { entry: %x = alloca i32, align 4 %y = alloca i32, align 4 %z = alloca i32, align 4 call void @atomic_start() ; <--- START %call = call i32 @input() store i32 %call, ptr %x, align 4 %0 = load i32, ptr %x, align 4 call void @log(i32 noundef %0) call void @atomic_end() ; <--- END store i32 1, ptr %y, align 4 %1 = load i32, ptr %y, align 4 %2 = add nsw i32 %1, 1 store i32 %2, ptr %z, align 4 %3 = load i32, ptr %z, align 4 call void @log(i32 noundef %3) ret void } ``` You may also link, build, and run an executable via: ```sh make run_eg3 && ../../benchmarks/ctests/example03.out ``` --- .gitignore | 1 + benchmarks/ctests/example01.c | 9 +- benchmarks/ctests/example01.ll | 10 ++ benchmarks/ctests/example01.orig.ll | 91 +++++++++++++++++++ benchmarks/ctests/example02.c | 10 +- benchmarks/ctests/example02.ll | 13 +++ benchmarks/ctests/example02.orig.ll | 13 +++ benchmarks/ctests/example03.c | 10 +- benchmarks/ctests/example03.ll | 17 +++- benchmarks/ctests/example03.orig.ll | 13 +++ ocelot/AtomicRegionInference/Makefile | 20 +++- ocelot/AtomicRegionInference/README.md | 19 +++- .../src/InferFreshCons.cpp | 67 +++++++++----- .../src/include/HelperTypes.h | 1 + .../src/include/InferAtoms.h | 6 ++ 15 files changed, 267 insertions(+), 33 deletions(-) create mode 100644 benchmarks/ctests/example01.orig.ll diff --git a/.gitignore b/.gitignore index 6f9ba50..fa78942 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .vscode ocelot/AtomicRegionInference/build +benchmarks/ctests/*.out .DS_Store \ No newline at end of file diff --git a/benchmarks/ctests/example01.c b/benchmarks/ctests/example01.c index 4b5b66f..3bad3e9 100644 --- a/benchmarks/ctests/example01.c +++ b/benchmarks/ctests/example01.c @@ -8,11 +8,18 @@ void atomic_end() {} int tmp() { return 0; } int (*IO_NAME1)() = tmp; -void log(int x) {} + +void log(int x) { + printf("%d\n", x); +} int app() { int x = tmp(); Fresh(x); log(x); return 0; +} + +int main() { + app(); } \ No newline at end of file diff --git a/benchmarks/ctests/example01.ll b/benchmarks/ctests/example01.ll index c4e8656..c38981e 100644 --- a/benchmarks/ctests/example01.ll +++ b/benchmarks/ctests/example01.ll @@ -6,6 +6,7 @@ target triple = "arm64-apple-macosx12.0.0" @.str = private unnamed_addr constant [7 x i8] c"Fresh\0A\00", align 1 @.str.1 = private unnamed_addr constant [12 x i8] c"Consistent\0A\00", align 1 @IO_NAME1 = global ptr @tmp, align 8 +@.str.2 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 declare i32 @printf(ptr noundef, ...) #0 @@ -32,6 +33,8 @@ define void @log(i32 noundef %x) #1 { entry: %x.addr = alloca i32, align 4 store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str.2, i32 noundef %0) ret void } @@ -48,6 +51,13 @@ entry: ret i32 0 } +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #1 { +entry: + %call = call i32 @app() + ret i32 0 +} + attributes #0 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } attributes #1 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } diff --git a/benchmarks/ctests/example01.orig.ll b/benchmarks/ctests/example01.orig.ll new file mode 100644 index 0000000..68b2445 --- /dev/null +++ b/benchmarks/ctests/example01.orig.ll @@ -0,0 +1,91 @@ +; ModuleID = '../../benchmarks/ctests/example01.c' +source_filename = "../../benchmarks/ctests/example01.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@.str = private unnamed_addr constant [7 x i8] c"Fresh\0A\00", align 1 +@.str.1 = private unnamed_addr constant [12 x i8] c"Consistent\0A\00", align 1 +@IO_NAME1 = global ptr @tmp, align 8 +@.str.2 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Fresh(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Consistent(i32 noundef %x, i32 noundef %id) #0 { +entry: + %x.addr = alloca i32, align 4 + %id.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + store i32 %id, ptr %id.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str.1) + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @tmp() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str.2, i32 noundef %0) + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @app() #0 { +entry: + %x = alloca i32, align 4 + %call = call i32 @tmp() + store i32 %call, ptr %x, align 4 + %0 = load i32, ptr %x, align 4 + call void @Fresh(i32 noundef %0) + %1 = load i32, ptr %x, align 4 + call void @log(i32 noundef %1) + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + %call = call i32 @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} diff --git a/benchmarks/ctests/example02.c b/benchmarks/ctests/example02.c index 1047d9e..75bced9 100644 --- a/benchmarks/ctests/example02.c +++ b/benchmarks/ctests/example02.c @@ -1,3 +1,5 @@ +#include + void Fresh(int x) {} void Consistent(int x, int id) {} @@ -9,7 +11,9 @@ int (*IO_NAME)() = sense; int norm(int t) { return t; } -void log(int x) {} +void log(int x) { + printf("%d\n", x); +} int tmp() { int t = sense(); @@ -21,4 +25,8 @@ void app() { int x = tmp(); Fresh(x); log(x); +} + +int main() { + app(); } \ No newline at end of file diff --git a/benchmarks/ctests/example02.ll b/benchmarks/ctests/example02.ll index 06281bb..5a557b7 100644 --- a/benchmarks/ctests/example02.ll +++ b/benchmarks/ctests/example02.ll @@ -4,6 +4,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" @IO_NAME = global ptr @sense, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync) define void @atomic_start() #0 { @@ -37,9 +38,13 @@ define void @log(i32 noundef %x) #0 { entry: %x.addr = alloca i32, align 4 store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) ret void } +declare i32 @printf(ptr noundef, ...) #1 + ; Function Attrs: noinline nounwind optnone ssp uwtable(sync) define i32 @tmp() #0 { entry: @@ -67,7 +72,15 @@ entry: ret void } +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } !llvm.module.flags = !{!0, !1, !2, !3} !llvm.ident = !{!4} diff --git a/benchmarks/ctests/example02.orig.ll b/benchmarks/ctests/example02.orig.ll index 8eccea6..550dc07 100644 --- a/benchmarks/ctests/example02.orig.ll +++ b/benchmarks/ctests/example02.orig.ll @@ -4,6 +4,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" @IO_NAME = global ptr @sense, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync) define void @Fresh(i32 noundef %x) #0 { @@ -55,9 +56,13 @@ define void @log(i32 noundef %x) #0 { entry: %x.addr = alloca i32, align 4 store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) ret void } +declare i32 @printf(ptr noundef, ...) #1 + ; Function Attrs: noinline nounwind optnone ssp uwtable(sync) define i32 @tmp() #0 { entry: @@ -85,7 +90,15 @@ entry: ret void } +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } !llvm.module.flags = !{!0, !1, !2, !3} !llvm.ident = !{!4} diff --git a/benchmarks/ctests/example03.c b/benchmarks/ctests/example03.c index 98b9d0d..06d59f8 100644 --- a/benchmarks/ctests/example03.c +++ b/benchmarks/ctests/example03.c @@ -1,3 +1,5 @@ +#include + void Fresh(int x) {} void Consistent(int x, int id) {} @@ -7,7 +9,9 @@ void atomic_end() {} int input() { return 0; } int (*IO_NAME)() = input; -void log(int x) {} +void log(int x) { + printf("%d\n", x); +} void app() { int x = input(); @@ -16,4 +20,8 @@ void app() { log(z); log(x); Fresh(x); +} + +int main() { + app(); } \ No newline at end of file diff --git a/benchmarks/ctests/example03.ll b/benchmarks/ctests/example03.ll index f47c6b7..f642b6b 100644 --- a/benchmarks/ctests/example03.ll +++ b/benchmarks/ctests/example03.ll @@ -4,6 +4,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" @IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync) define void @atomic_start() #0 { @@ -28,9 +29,13 @@ define void @log(i32 noundef %x) #0 { entry: %x.addr = alloca i32, align 4 store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) ret void } +declare i32 @printf(ptr noundef, ...) #1 + ; Function Attrs: noinline nounwind optnone ssp uwtable(sync) define void @app() #0 { entry: @@ -45,14 +50,22 @@ entry: call void @atomic_end() store i32 1, ptr %y, align 4 %1 = load i32, ptr %y, align 4 - %2 = add i32 %1, 1 + %2 = add nsw i32 %1, 1 store i32 %2, ptr %z, align 4 %3 = load i32, ptr %z, align 4 - call void @log(i32 %3) + call void @log(i32 noundef %3) ret void } +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } !llvm.module.flags = !{!0, !1, !2, !3} !llvm.ident = !{!4} diff --git a/benchmarks/ctests/example03.orig.ll b/benchmarks/ctests/example03.orig.ll index 89676a7..89a0869 100644 --- a/benchmarks/ctests/example03.orig.ll +++ b/benchmarks/ctests/example03.orig.ll @@ -4,6 +4,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" @IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync) define void @Fresh(i32 noundef %x) #0 { @@ -46,9 +47,13 @@ define void @log(i32 noundef %x) #0 { entry: %x.addr = alloca i32, align 4 store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) ret void } +declare i32 @printf(ptr noundef, ...) #1 + ; Function Attrs: noinline nounwind optnone ssp uwtable(sync) define void @app() #0 { entry: @@ -70,7 +75,15 @@ entry: ret void } +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } !llvm.module.flags = !{!0, !1, !2, !3} !llvm.ident = !{!4} diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile index 9ab940c..306f019 100644 --- a/ocelot/AtomicRegionInference/Makefile +++ b/ocelot/AtomicRegionInference/Makefile @@ -1,5 +1,10 @@ .PHONY: clean_tests clean eg1 eg2 +all: + make eg1 + make eg2 + make eg3 + eg1: TEST=example01 make test eg2: @@ -7,8 +12,15 @@ eg2: eg3: TEST=example03 make test +run_eg1: + TEST=example01 make run +run_eg2: + TEST=example02 make run +run_eg3: + TEST=example03 make run + test: - $(MAKE) -C build all + $(MAKE) -C build clang -S -emit-llvm\ -fno-discard-value-names\ ../../benchmarks/ctests/$(TEST).c\ @@ -19,6 +31,12 @@ test: ../../benchmarks/ctests/$(TEST).c\ -o ../../benchmarks/ctests/$(TEST).ll +run: + $(MAKE) -C build + clang -fpass-plugin=build/src/InferAtomsPass.dylib\ + ../../benchmarks/ctests/$(TEST).c\ + -o ../../benchmarks/ctests/$(TEST).out + clean_tests: find ../../benchmarks/ctests -name "*.ll" -exec rm -rf {} \; diff --git a/ocelot/AtomicRegionInference/README.md b/ocelot/AtomicRegionInference/README.md index 5895b8c..2f9aed8 100644 --- a/ocelot/AtomicRegionInference/README.md +++ b/ocelot/AtomicRegionInference/README.md @@ -11,11 +11,22 @@ cmake .. make ``` -You may bootstrap Clang to use the pass to compile a C file like so: +You may bootstrap Clang to use the pass to compile a C file like so (run in the +same directory as this README): ```sh -clang -S -emit-llvm -fpass-plugin=src/InferAtomsPass.dylib -fno-discard-value-names ../../../benchmarks/ctests/example01.c +clang -S -emit-llvm -fpass-plugin=build/src/InferAtomsPass.dylib -fno-discard-value-names ../../benchmarks/ctests/example03.c ``` -Or, when testing, use the shortcuts provided in the Makefile (e.g., `make eg1`), -which produce two LLVM IRs with and without the pass enabled. +Or, use the shortcuts provided in the Makefile (e.g., `make eg3`), which produce +two LLVM IRs with and without the pass enabled. + +Actually link and produce executable by running: + +```sh +clang -fpass-plugin=build/src/InferAtomsPass.dylib ../../benchmarks/ctests/example03.c -o ../../benchmarks/ctests/example03.out + +../../benchmarks/ctests/example03.out +``` + +Or, use the equivalent shortcut `make run_eg3 && ../../benchmarks/ctests/example03.out`. diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index 727a6d1..2802f76 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -187,7 +187,11 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { if (seenBlocks.find(B) == seenBlocks.end()) { seenBlocks.emplace(B); + // A mapping from original instructions to their clones + inst_inst_map clonedInsts; + // Instructions to be delayed till the end of the block inst_vec toDelay; + // (The original) instructions to be deleted inst_set toDelete; for (auto& I : *B) { @@ -198,49 +202,66 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { #if DEBUG errs() << "Should be delayed\n"; #endif - Instruction *prev, *clone; - // Clone each untainted instruction to be inserted to - // the end of the basic block + Instruction* clone; + + // Clone each untainted instruction to be appended to + // the end of the basic block, in the original order if (isa(I)) { - if (I.getOpcode() == Instruction::Add) - clone = BinaryOperator::Create(Instruction::Add, prev, I.getOperand(1)); - else - clone = I.clone(); + clone = I.clone(); + + for (int i = 0; i < 2; i++) { + if (auto* op = dyn_cast(I.getOperand(i))) { + // Since operands don't get cloned along the eway, + // look up the clone of each operand... + inst_inst_map::iterator it = clonedInsts.find(op); + assert(it != clonedInsts.end()); + // ...and overwrite the original operand with it + clone->setOperand(i, it->second); + } + } } else if (auto* ci = dyn_cast(&I)) { - if (prev != nullptr) - clone = CallInst::Create(ci->getCalledFunction(), prev); - } else if (auto* si = dyn_cast(&I)) { - if (prev != nullptr && find(targetInsts.begin(), targetInsts.end(), prev) == targetInsts.end()) { - clone = I.clone(); - clone->setOperand(0, prev); - } else - clone = I.clone(); - } else clone = I.clone(); - // Keep track of the previous instruction to allow LLVM - // to remap virtual registers (avoiding 's) - prev = clone; + if (auto* op = dyn_cast(I.getOperand(0))) { + inst_inst_map::iterator it = clonedInsts.find(op); + assert(it != clonedInsts.end()); + clone->setOperand(0, it->second); + } + } else if (isa(&I)) { + clone = I.clone(); + + if (auto* op = dyn_cast(I.getOperand(0))) { + inst_inst_map::iterator it = clonedInsts.find(op); + assert(it != clonedInsts.end()); + clone->setOperand(0, it->second); + } + } + // e.g., LoadInst + else { + clone = I.clone(); + } + clonedInsts.emplace(&I, clone); toDelete.emplace(&I); toDelay.push_back(clone); } } IRBuilder builder(B); - // Insert each delayed instruction to the end of the block - for (auto* d : toDelay) builder.Insert(d); + // Append each delayed instruction to the end of the block, + // in the original order + for (auto* I : toDelay) builder.Insert(I); auto I = B->begin(); - // Delete their duplicates earlier in the block + // Delete the originals for (; I != B->end();) { #if DEBUG errs() << *I << "\n"; #endif if (toDelete.find(&*I) != toDelete.end()) { #if DEBUG - errs() << "Delete\n"; + errs() << "Deleted\n"; #endif I = I->eraseFromParent(); } else diff --git a/ocelot/AtomicRegionInference/src/include/HelperTypes.h b/ocelot/AtomicRegionInference/src/include/HelperTypes.h index 29efcc0..4a9414e 100644 --- a/ocelot/AtomicRegionInference/src/include/HelperTypes.h +++ b/ocelot/AtomicRegionInference/src/include/HelperTypes.h @@ -34,6 +34,7 @@ typedef std::map inst_vals_map; typedef std::map inst_insts_map; typedef std::vector func_vec; typedef std::vector inst_vec_vec; +typedef std::map inst_inst_map; extern gv_vec gv_list; diff --git a/ocelot/AtomicRegionInference/src/include/InferAtoms.h b/ocelot/AtomicRegionInference/src/include/InferAtoms.h index 19701e0..217f92b 100644 --- a/ocelot/AtomicRegionInference/src/include/InferAtoms.h +++ b/ocelot/AtomicRegionInference/src/include/InferAtoms.h @@ -17,6 +17,7 @@ #include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassPlugin.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/InstructionNamer.h" using namespace llvm; @@ -44,6 +45,11 @@ llvmGetPassPluginInfo() { .PluginName = "Atomic Region Inference Pass", .PluginVersion = "v0.1", .RegisterPassBuilderCallbacks = [](PassBuilder& PB) { + // PB.registerPipelineParsingCallback( + // [](StringRef PassName, FunctionPassManager& FPM, ...) { + // FPM.addPass(InstructionNamerPass()); + // return true; + // }); PB.registerPipelineStartEPCallback( [](ModulePassManager& MPM, OptimizationLevel Level) { MPM.addPass(InferAtomsPass()); From 7d71c889bd800bf74f5a88953990c9ae3754cdf2 Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Sat, 3 Feb 2024 18:33:52 -0500 Subject: [PATCH 06/18] [InferAtomsPass] More code cleanup & debug logging --- benchmarks/ctests/example.bc | Bin 0 -> 5984 bytes benchmarks/ctests/example.rs | 24 ++ benchmarks/ctests/example03.ll | 16 +- benchmarks/ctests/example04.c | 27 ++ benchmarks/ctests/example04.orig.ll | 102 +++++++ ocelot/AtomicRegionInference/Makefile | 5 + ocelot/AtomicRegionInference/src/Helpers.cpp | 8 + .../AtomicRegionInference/src/InferAtoms.cpp | 262 ++++++++++++------ .../src/InferFreshCons.cpp | 27 +- .../src/TaintTracker.cpp | 161 ++++++----- .../src/include/Helpers.h | 1 + 11 files changed, 457 insertions(+), 176 deletions(-) create mode 100644 benchmarks/ctests/example.bc create mode 100644 benchmarks/ctests/example.rs create mode 100644 benchmarks/ctests/example04.c create mode 100644 benchmarks/ctests/example04.orig.ll diff --git a/benchmarks/ctests/example.bc b/benchmarks/ctests/example.bc new file mode 100644 index 0000000000000000000000000000000000000000..4163fd996b61aa6750aed7c63c25855384623a88 GIT binary patch literal 5984 zcmcgv4NzNGcD@fi^qxUHJ&e2tTaurExY!Ns(+~PFFx8LkV230!VOyGbS3Lb0*8B)b z#z3;o17U=BaR<9}LNiVai8q_1$u1!tkF)i3kVG8TC0>$EG9K6UK){K!V>`q@yGe&> zd#(gFj^mwl`*TO{y?eiR?sv{T_ndQ|E{?CwM9>s)RjoznKcU$j;}=T-U;uN=c%|)~ zjMwTiUuUWPGt$6}G&n82?v&rK%9~iKIY5N~oR-3HmSe6s8M}ge9Oh|c2>nQcP-dD2 zrNQ_+a8X?ahi#MO4k@d%93gsS52Vw1YVs#;QL!z}ek9X=jMXX3bne3$y6V2lz=Wn? z>*{DfYdezWouuDTaE|SL+^+~-bx-c*qRj)^r$=`m=H8wJ-h&)MU^>e8lkAwidq;LP z+zz+Kos}7@Hf}qeZm-@*KlogMb#?x^G#ZInoQ#Ys=QYanBg*qgeNm&j(4{_OQ=|Ru)c$%2s1ERyI2;WO zL_iwAN@`!St2t@Y>qQ<&tkfuV2?xd*>b2K;{HRe+7FH@XyhERhm&JU|TMra54kOCj( zbl2IrDpu;Y^HsS$@gY{3gYPW7LOG*gS=*>&OMN6DQtiN7A8Dhbt?ki{@u;~^ViMsO z$r8X*(@f_A(=@=eS*gxCDm*RyjHQAi)tS6E%`hQu34b$*?3B~lM{K?Qiaf$#A3ECU zd-`P7o@c)=H0;1IZxrtr#Lvn(UQ2$65u-r4bS_L?be{`jvy~ zT*PA{0xlNO{IUGC6;4#g!4UFAEJAEu^>(Dq45NH8HZhYX?-bR5CyI=@>Kxh>; zA8z?uLV_YNykpqVx^>A(BA_Xa?|zWWBOXt>t#RxhV?@%-+f?8C!Qrm2QN4Qcxd=sz zU=xSAV0v#2Z?svH`Y{#o^T*cd^V85c?qdK*4`!tuxFJPo%$lkrI`xnuFS}}>f7Ql^ zPLqzE>AT2F&iGeRg4~WO3wx5Y@Oo0sD#;9IYPRyV=naUOQvm8|Q8_WI^ec#pK8Lv{ z(Qg;2j&>?E)GGS~$*;1^O_9;d?CN+-1?lGJ)01M6^;43HNk>J$q++VQA2ReQh|kJs z08%v|X={%LxM~^DL)k{ z?~#;EEhyURsHf^F6V83gB;sre)InZTZC|DX?S3)pf|$*X28W^@LtPJETVSruFt^5; zE4;)#30X6uoJ!s=2+A`Wav|1pMXC!iTz}rzD4_^km{HEp zs(wqT{kgl$qmC^{BV|+0!li}OMWH^l5ACYDIUvU#Q2$VUMo?bXs4m7-e?_RV!5YT!8}RWNz>UvG;B2 z`PsGSW>pu#fXckG(MZMd$oigyx!+-#N|c|BStb*(#`3Akz%@HA{?SC7pylK|IMz)nqBCx)RCs%C8Ji!r6Y(8Dvccz@+Ua*20nBKvZw zP-WCL9DQ1Zxo->-<^75B!Gw9*u?50u3OpvO)U2@!YB2R|OjX^b@GJN5^6NZvWr3MP z9UMZNA?}0fO3u+I%Uc!HRdNMg{f7^~w{@5;@|J6L_P-v`yl7@e(2g-Fa_(?&RlYV~ zqjSf$R42GE6!vg!zF?@KrJ2)~nDiw)SLE>+je^%;G;=y$r!Us>#k_tK=qaEN8oCFn z3g}g(2`l-wETU29$%IVoCpQ)q7>)mAeD;KCY|9DVTK$R9H{aSt&Q%^b^zP5!ec;e* zQ-_)d5%CJ-X)*9*W(12M{;AH|z!3NxyUQ}2=>9}`dL-*I93L^fHL1Azj$y8j`P3<& z<>d1?Xyu~NsQ?gSg#0EapLNQiCS>-dHX{AkW^nsCNG>=-BV`bEP!lF4WneOT59%hA zDnw0{AB&V7g`|^I3`fc#0S6Me!U-Il`|!*mZ4crSi)G2vdyk^ZMPUOzXJ=LOnhpP+ zEQVk3$@-0QdXAlE9|XTj(@}R_U2VLNAVe8nZrH;eL|a>^JMRg z41Xu#CUB{OyQ~wF5AK9Im~H*|aJ*{baQiX7(MnT9e{LZ~cgG4=33ZBzQ9?tK*0^SU zj|3**54+!K7EiUZR&UKmt(ARWaB?Bv=37TfxI%Yb3ouO2Vo}tMZPWM`CJDeGHJAR2zF=zsZ&>DE&ySB&24y0u*s|lv3mVwei_Oxd4Lw%>kyH@EzL+{>E~flUAzY!2pKVJ;vk`*1AhkXw`cjrVJCXB{bCu7@7pgPJkC z2ZN0xd=62#ev@oH8?&L`C}}!BG8`td#qbsdWw+(OnKg-~jHI34!;&|ok~QeXRRsSW zM?TN=&*3>%^gQ#fBrDl_lZ=(jTG1-tH}RycX+yl_$-af(Nb?tOty0GADC^elahCl3 z@^0a^W4oU0ec|B~vWK3!-C1B)_GYeg(6@kY9BlUz(XYemVqeAQVA z$lx}@A@p5%zDWMcNpPgyt# zXRm&O&G+{7e872oycc-;1Az?5%OvWuWr=49QbY?*@@G!Sdb+SCwh8JCN%=sck3?40 z(X{jzK*Hvve+Dm5O?(rM-Jb?Bz-()=XorDQmU<;A0ts1WhU`)x0}Qu0wjq*5-3gHx zPTOU5igkqO97U*=Tly{oGkek|NHhDWNJi3Y#lIA#zesOKyG8o*^dZqDW<;bX$LmGe zv(j_uha&R}Y3|{@pzbFbrUM_|q@Sw&aDVLPryE|E+$i4lm2zsVc}|CX@ZawM$gGwO z7GXQF-~I${*vu-pu&-<3!VN!H!$pIwxHW(r93==oLy;PucrS5VevkWD9=Cr087Dk( zzT^L9_`ek55&9hPKZWrR;KFAp#t8#L>%f>DFz$n^atX)s9>AZ3@ibgdtib;naC|^) z27|Y(z`t6;yrFNE>m0A8?+gZ;af7U zSx|~=K9lY&+8){Fdg4)iDD2hpp^lKt)6&$`(rj@Hp1t9q;PL6qE*Ji) zak+ha8k$|9`j#N{m)~kl^;)f^ROj&V!&GnJU#<3xs8I~;5WPdf*URI{pvZbk|Iy9z1C98oh3>(V#Q0&>08^&86O8OTYz= z4>ZDN6js#~!cLWx1j8jIcYKTC!YXHBVKrP2)fASL+*#^94#Waru-9M^EP9>S<1Z~0 z3_24wxYXdfi)x>aFBC$B_>t5a2)1~9p-@RljW5{L&@5m#V>)Bd7l5z;R$gcHYTbIj zx71`Y@uuX~n0)O*Q=rjTx;^L%)nnBbU8&z|F12V47PrSJYzsF9K>MA! zliTaI_)T7~U~s#)34xVTh9;q*8N>v=-oy(YScl(W_Lzi~L1nq)B5+~Y7i<<9@f5G% z=S?QT==Jhi1Ml9JjBj7PD*)f^Eg;hn?ntqh@Flup{I)ptDPIhs44K1u4ZOkQ77RYU z#p^G%l$x}>*=Vtt&0e3^iws9mcCPJYQFB?3&- z8~h%f*{d}PW@E8u&%P4g^}n5vOMKrZBfnYnk4Q-%x^L$E4|DPlQQwtX_a^1Ho_cRq z-k$pZoR*;X{~|Aeb~)khpO`M3nJXmkmzpk|n=9n*pPV3s&l((x-!?l_$IspA>B9M0 R(_var: T) -> () {} + +fn Consistent(_var: T, _id: u16) -> () {} + +#[no_mangle] +pub static IO_NAME: fn() -> i32 = tmp; + +#[no_mangle] +fn tmp() -> i32 { + 0 +} + +fn log(i: i32) -> () {} + +#[no_mangle] +fn app() -> () { + let x = tmp(); + Fresh(x); + log(x) +} + +fn main() -> () { + app() +} diff --git a/benchmarks/ctests/example03.ll b/benchmarks/ctests/example03.ll index f642b6b..df86c33 100644 --- a/benchmarks/ctests/example03.ll +++ b/benchmarks/ctests/example03.ll @@ -45,15 +45,15 @@ entry: call void @atomic_start() %call = call i32 @input() store i32 %call, ptr %x, align 4 - %0 = load i32, ptr %x, align 4 - call void @log(i32 noundef %0) - call void @atomic_end() store i32 1, ptr %y, align 4 - %1 = load i32, ptr %y, align 4 - %2 = add nsw i32 %1, 1 - store i32 %2, ptr %z, align 4 - %3 = load i32, ptr %z, align 4 - call void @log(i32 noundef %3) + %0 = load i32, ptr %y, align 4 + %add = add nsw i32 %0, 1 + store i32 %add, ptr %z, align 4 + %1 = load i32, ptr %z, align 4 + call void @log(i32 noundef %1) + %2 = load i32, ptr %x, align 4 + call void @log(i32 noundef %2) + call void @atomic_end() ret void } diff --git a/benchmarks/ctests/example04.c b/benchmarks/ctests/example04.c new file mode 100644 index 0000000..5cbb707 --- /dev/null +++ b/benchmarks/ctests/example04.c @@ -0,0 +1,27 @@ +#include + +void Fresh(int x) {} +void Consistent(int x, int id) {} +void FreshConsistent(int x, int id) {} + +void atomic_start() {} +void atomic_end() {} + +int input() { return 0; } +int (*IO_NAME)() = input; + +void log(int x) { + printf("%d\n", x); +} + +void app() { + int x = input(); + int y = input(); + log(y); + Consistent(x, 1); + FreshConsistent(y, 1); +} + +int main() { + app(); +} \ No newline at end of file diff --git a/benchmarks/ctests/example04.orig.ll b/benchmarks/ctests/example04.orig.ll new file mode 100644 index 0000000..8491e4d --- /dev/null +++ b/benchmarks/ctests/example04.orig.ll @@ -0,0 +1,102 @@ +; ModuleID = '../../benchmarks/ctests/example04.c' +source_filename = "../../benchmarks/ctests/example04.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Fresh(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Consistent(i32 noundef %x, i32 noundef %id) #0 { +entry: + %x.addr = alloca i32, align 4 + %id.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + store i32 %id, ptr %id.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @FreshConsistent(i32 noundef %x, i32 noundef %id) #0 { +entry: + %x.addr = alloca i32, align 4 + %id.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + store i32 %id, ptr %id.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %y = alloca i32, align 4 + %call = call i32 @input() + store i32 %call, ptr %x, align 4 + %call1 = call i32 @input() + store i32 %call1, ptr %y, align 4 + %0 = load i32, ptr %y, align 4 + call void @log(i32 noundef %0) + %1 = load i32, ptr %x, align 4 + call void @Consistent(i32 noundef %1, i32 noundef 1) + %2 = load i32, ptr %y, align 4 + call void @FreshConsistent(i32 noundef %2, i32 noundef 1) + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile index 306f019..360ec68 100644 --- a/ocelot/AtomicRegionInference/Makefile +++ b/ocelot/AtomicRegionInference/Makefile @@ -4,6 +4,7 @@ all: make eg1 make eg2 make eg3 + make eg4 eg1: TEST=example01 make test @@ -11,6 +12,8 @@ eg2: TEST=example02 make test eg3: TEST=example03 make test +eg4: + TEST=example04 make test run_eg1: TEST=example01 make run @@ -18,6 +21,8 @@ run_eg2: TEST=example02 make run run_eg3: TEST=example03 make run +run_eg4: + TEST=example04 make run test: $(MAKE) -C build diff --git a/ocelot/AtomicRegionInference/src/Helpers.cpp b/ocelot/AtomicRegionInference/src/Helpers.cpp index a0f62cf..5ca398e 100644 --- a/ocelot/AtomicRegionInference/src/Helpers.cpp +++ b/ocelot/AtomicRegionInference/src/Helpers.cpp @@ -34,3 +34,11 @@ void printInsts(const inst_vec& iv) { errs() << *inst << "\n"; } } + +void printIntInsts(const std::map& iim) { + for (auto& [id, insts] : iim) { + errs() << id << " ->\n"; + printInsts(insts); + errs() << "\n"; + } +} diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp index b0219cd..be7f108 100644 --- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp +++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp @@ -30,6 +30,8 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) { std::map consVars; inst_vec_vec freshVars; inst_insts_map inputMap = buildInputs(this->M); + errs() << "inputMap:\n"; + printInstInsts(inputMap); inst_vec toDelete; getAnnotations(&consVars, &freshVars, inputMap, &toDelete); // TODO: need to add unique point of call chain prefix to cons set @@ -90,47 +92,60 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ for (auto& I : B) { if (auto* ci = dyn_cast(&I)) { #if DEBUG - errs() << "[Loop Inst] cur inst = CallInst\n"; + errs() << "[Loop Inst] Found call: " << *ci << "\n"; #endif auto* fun = ci->getCalledFunction(); // Various empty or null checks if (fun == NULL || fun->empty() || !fun->hasName()) continue; // Consistent and FreshConsistent + // TODO: Fix FreshConsistent if (isAnnot(fun->getName()) && !fun->getName().equals("Fresh")) { -#if DEBUG - errs() << "[Loop Inst] Calls Consistent/FreshConsistent\n"; -#endif toDelete->push_back(ci); - // First para is var, second is id int setID; // Bit cast use of x, then value operand of store - Instruction* var = dyn_cast(ci->getOperand(0)); - + auto* var = dyn_cast(ci->getOperand(0)); if (var == NULL) continue; - // errs() << "New consistent annot. with " << *var<<"\n"; - Value* id = ci->getOperand(1); - if (ConstantInt* cint = dyn_cast(id)) { +#if DEBUG + errs() << "Cons. annot. for: " << *var << "\n"; +#endif + + auto* id = ci->getOperand(1); + if (auto* cint = dyn_cast(id)) { setID = cint->getSExtValue(); +#if DEBUG + errs() << "In set with label: " << setID << "\n"; +#endif } + std::queue customUsers; std::set v; // v.emplace(ci); // in case var itself is iOp - for (Instruction* iOp : inputMap[var]) { - v.emplace(iOp); +#if DEBUG + errs() << "Add to v inputs assoc. w/ Cons. var:\n"; +#endif + for (auto* input : inputMap[var]) { +#if DEBUG + errs() << "Input: " << *input << "\n"; +#endif + v.emplace(input); } // customUsers.push(var); - for (Value* use : var->users()) { - // don't push the annotation - if (use == ci) { - continue; - } - // errs() << "DEBUG: pushing use of var: " << *use << "\n"; +#if DEBUG + errs() << "Collect uses of Cons. var:\n"; +#endif + for (auto* use : var->users()) { + // Don't push the annotation + if (use == ci) continue; +#if DEBUG + errs() << "Use: " << *use << "\n"; +#endif customUsers.push(use); } + while (!customUsers.empty()) { - Value* use = customUsers.front(); + auto* use = customUsers.front(); customUsers.pop(); // errs() << "DEBUG: use is " << *use << " of var " << *var<<"\n"; if (Instruction* instUse = dyn_cast(use)) { @@ -168,34 +183,67 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ } } } - // last case - if (v.empty()) { - // some entries have a first link with ci, not var - for (Instruction* iOp : inputMap[ci]) { + // Last case + if (v.empty()) { +#if DEBUG + errs() << "v empty, go over inputs assoc. w/ Cons. annot.:\n"; +#endif + // Some entries have a first link with ci, not var + for (auto* input : inputMap[ci]) { +#if DEBUG + errs() << "Input: " << *input << "\n"; +#endif if (inputMap[ci].size() == 1) { - for (Instruction* origLink : inputMap[iOp]) { +#if DEBUG + errs() << "Set of assoc. inputs is a singleton\n"; +#endif + for (auto* origLink : inputMap[input]) { +#if DEBUG + errs() << "Add to v the original input: " << *origLink << "\n"; +#endif v.emplace(origLink); } } else { - v.emplace(iOp); +#if DEBUG + errs() << "Set of assoc. input isn't a singleton, add to v the input\n"; +#endif + v.emplace(input); } } } - // for later deletion purposes + + // For later deletion purposes +#if DEBUG + errs() << "Remove inputs assoc. w/ Cons. annot.\n"; +#endif inputMap.erase(ci); if (!v.empty()) { - inst_vec temp; - for (Instruction* item : v) { - temp.push_back(item); +#if DEBUG + errs() << "v not empty\n"; +#endif + inst_vec tmp; +#if DEBUG + errs() << "Add each item in v to tmp:\n"; +#endif + for (auto* item : v) { +#if DEBUG + errs() << "Item: " << *item << "\n"; +#endif + tmp.push_back(item); } - // add the collected list to the map + + // Add the collected list to the map if (consVars->find(setID) != consVars->end()) { - consVars->at(setID).insert(consVars->at(setID).end(), temp.begin(), temp.end()); + consVars->at(setID).insert(consVars->at(setID).end(), tmp.begin(), tmp.end()); } else { - consVars->emplace(setID, temp); + consVars->emplace(setID, tmp); } +#if DEBUG + errs() << "Add tmp items to consVars: \n"; + printIntInsts(*consVars); +#endif } } else if (fun->getName().equals("Fresh")) { #if DEBUG @@ -247,10 +295,7 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ } } } - } else { - // errs() << "error casting\n"; } - // errs() << "New Fresh annot. with " << *var<<"\n"; // v.push_back(ci); #if DEBUG @@ -341,33 +386,71 @@ void InferAtomsPass::removeAnnotations(inst_vec* toDelete) { } } -/*Given the starting point annotations of conSets, find the -deepest unique point of the call chain*/ +// Given the starting point annotations of conSets, find the +// deepest unique point of the call chain std::map InferAtomsPass::collectCons(std::map startingPoints, inst_insts_map inputMap) { +#if DEBUG + errs() << "=== collectCons ===\n"; +#endif std::map toReturn; - for (std::pair iv : startingPoints) { + +#if DEBUG + errs() << "Go over all cons. sets\n"; +#endif + for (auto& [id, starts] : startingPoints) { +#if DEBUG + errs() << "Go over cons. set " << id << "\n"; +#endif std::set unique; std::map> callChains; - // each item should be the starting point from a different annot - for (Instruction* item : iv.second) { + + // Each item should be the starting point from a different annot + for (auto* start : starts) { +#if DEBUG + errs() << "Starting point: " << *start << "\n"; +#endif + // Add self to call chain #if DEBUG - errs() << "Starting point: " << *item << "\n"; + errs() << "Add starting point to call chain\n"; #endif - // add self to call chain - callChains[item].insert(item); + callChains[start].insert(start); - for (Instruction* iOp : inputMap[item]) { +#if DEBUG + errs() << "Go over inputs assoc. w/ starting point:\n"; +#endif + for (auto* input : inputMap[start]) { // unique.insert(iOp); - callChains[item].insert(iOp); +#if DEBUG + errs() << "Input: " << *input << "\n"; + errs() << "Add input to call chain\n"; +#endif + callChains[start].insert(input); + std::queue toExplore; - toExplore.push(iOp); +#if DEBUG + errs() << "Add input to toExplore, go over toExplore\n"; +#endif + toExplore.push(input); + while (!toExplore.empty()) { - Instruction* curr = toExplore.front(); + auto* cur = toExplore.front(); toExplore.pop(); - for (Instruction* intermed : inputMap[curr]) { - if (!(find(callChains[item].begin(), callChains[item].end(), intermed) != callChains[item].end())) { - callChains[item].insert(intermed); +#if DEBUG + errs() << "Exploring cur: " << *cur << "\n"; + errs() << "Go over inputs assoc. w/ cur: " << *cur << "\n"; +#endif + + for (auto* intermed : inputMap[cur]) { +#if DEBUG + errs() << "intermed: " << *intermed << "\n"; +#endif + if (find(callChains[start].begin(), callChains[start].end(), intermed) == callChains[start].end()) { + callChains[start].insert(intermed); toExplore.push(intermed); + } else { +#if DEBUG + errs() << "intermed already in call chain\n"; +#endif } } } @@ -375,57 +458,78 @@ std::map InferAtomsPass::collectCons(std::map star } // finish constructing call chain for one annot. in the set } // constructed call chains for ALL annot. in the set. - // now check the call chain + // now check the call chain // int index = 0; // map foundUniquePoint; // clean up the call chains - for (auto ccmap : callChains) { - for (Instruction* possibility : ccmap.second) { - // if the link is in the same function, then continue - // errs() << "examining possibility: " << *possibility << "\n"; - bool sf = false; - for (Instruction* link : inputMap[possibility]) { - // errs() << "next link is" << *link << "\n"; - if ((link != possibility) && link->getFunction() == possibility->getFunction()) { - sf = true; - } - } - if (sf) { +#if DEBUG + errs() << "Finished building call chains, go over them\n"; +#endif + for (auto callChain : callChains) { +#if DEBUG + errs() << "Next chain\n"; +#endif + auto& [id, chain] = callChain; + for (auto* inst : chain) { +#if DEBUG + errs() << "Cur point along chain: " << *inst << "\n"; +#endif + bool isSameFun = false; + for (auto* link : inputMap[inst]) + isSameFun = ((link != inst) && link->getFunction() == inst->getFunction()); + if (isSameFun) { +#if DEBUG + errs() << "Continue if the link is in the same function\n"; +#endif continue; } + bool isUnique = true; - for (auto ccmapNest : callChains) { - // if self then skip - if (ccmapNest == ccmap) { - continue; - } - // otherwise check if this map also contains the possibility - if (find(ccmapNest.second.begin(), ccmapNest.second.end(), possibility) != ccmapNest.second.end()) { + for (auto otherCallChain : callChains) { + // Skip if self + if (otherCallChain == callChain) continue; + auto& [_, otherChain] = otherCallChain; + // Otherwise check if this map also contains inst + if (find(otherChain.begin(), otherChain.end(), inst) != otherChain.end()) { isUnique = false; break; } } + if (isUnique) { - unique.insert(possibility); - // errs() << "Found unique!" << *possibility << "\n"; - } else { - // try another poss. - continue; + unique.insert(inst); +#if DEBUG + errs() << "Found unique point along chain: " << *inst << "\n"; +#endif } } } inst_vec v; - for (Instruction* item2 : unique) { - if (!isa(item2)) { - v.push_back(item2); +#if DEBUG + errs() << "Go over unique insts\n"; +#endif + for (auto* inst : unique) { + if (!isa(inst)) { +#if DEBUG + errs() << "Unique inst != AllocaInst, add to v: " << *inst << "\n"; +#endif + v.push_back(inst); } } - toReturn[iv.first] = v; + +#if DEBUG + errs() << "Add v to toReturn at ID " << id << ": \n"; + printInsts(v); +#endif + toReturn[id] = v; } // end starting point check +#if DEBUG + errs() << "*** collectCons ***\n"; +#endif return toReturn; } diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index 2802f76..8dc4375 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -60,10 +60,9 @@ BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) { // Top level region inference function -- could flatten later void InferFreshCons::inferConsistent(std::map consSets) { - // TODO: start with pseudo code structure from design doc - for (auto [id, set] : consSets) { + for (auto& [id, set] : consSets) { #if DEBUG - errs() << "[InferConsistent] starting set " << id << "\n"; + errs() << "[InferConsistent] Adding region for set " << id << "\n"; #endif addRegion(set, Consistent); } @@ -74,7 +73,6 @@ void InferFreshCons::inferFresh(inst_vec_vec freshSets) { #if DEBUG errs() << "=== inferFresh ===\n"; #endif - // TODO: start with pseudo code structure from design doc for (auto freshSet : freshSets) addRegion(freshSet, Fresh); #if DEBUG errs() << "*** inferFresh ***\n"; @@ -220,7 +218,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { clone->setOperand(i, it->second); } } - } else if (auto* ci = dyn_cast(&I)) { + } else if (isa(&I)) { clone = I.clone(); if (auto* op = dyn_cast(I.getOperand(0))) { @@ -338,9 +336,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { } else if (endDom == nullptr) { errs() << "[Error] Null endDom after scope merge\n"; } -#if DEBUG - errs() << "[Loop regionsNeeded] Insert insts\n"; -#endif + // TODO: fallback if endDom is null? Need hyper-blocks, I think // possibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations? auto* regionStart = truncate(startDom, true, targetInsts, seenFuns); @@ -381,15 +377,13 @@ Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set #if DEBUG errs() << "Set:\n"; - for (auto& inst : set) - errs() << *inst << "\n"; + printInsts(set); #endif // Truncate the front if (forwards) { #if DEBUG - errs() << "Truncate startDom\n"; - errs() << "Go over each inst\n"; + errs() << "Truncate startDom, go over each inst\n"; #endif for (auto& I : *B) { // Stop at first inst in bb that is in the set. @@ -414,11 +408,10 @@ Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set } #if DEBUG - errs() << "Truncate endDom\n"; - errs() << "Go over each inst in reverse\n"; + errs() << "Truncate endDom, go over each inst in reverse\n"; #endif // Reverse directions if not forwards - Instruction* prev = NULL; + Instruction* prev; for (auto I = B->rbegin(), rend = B->rend(); I != rend; I++) { auto* inst = &*I; if (find(set.begin(), set.end(), inst) != set.end()) { @@ -427,7 +420,7 @@ Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set #endif // Need to return the previous inst (next in forwards), // as it should be inserted before the returned inst - if (prev == NULL) { + if (prev == nullptr) { // Only happens if use is a ret inst, which is a scope use to make the branching // work, not an actual one, so this is safe return inst; @@ -585,7 +578,7 @@ inst_inst_pair InferFreshCons::findShortest(inst_inst_vec regionsFound) { // Get the max length from the bb to the end instruction std::vector v; int endLength = getSubLength(startParent, end, v); - // Substract the prefix before the start inst + // Subtract the prefix before the start inst endLength -= prefixLength; #if DEBUG errs() << "[Loop regionsFound] Region length " << endLength << "\n"; diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp index f80f7ce..1c400d9 100644 --- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp +++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp @@ -12,7 +12,7 @@ inst_insts_map buildInputs(Module* M) { for (auto inputInst : inputInsts) { #if DEBUG - errs() << "[Loop inputInst] orig input: " << *inputInst << "\n"; + errs() << "[Loop inputInst] inputInst: " << *inputInst << "\n"; #endif // Add self to map @@ -32,24 +32,24 @@ inst_insts_map buildInputs(Module* M) { #if DEBUG errs() << "=== Loop toExplore ===\n"; #endif - auto* curVal = toExplore.front(); + auto* curInst = toExplore.front(); toExplore.pop(); - if (curVal == NULL) continue; + if (curInst == NULL) continue; #if DEBUG - errs() << "[Loop toExplore] cur inst: " << *curVal << "\n"; + errs() << "[Loop toExplore] curInst: " << *curInst << "\n"; #endif val_vec interProcFlows; - if (curVal == inputInst) { + if (curInst == inputInst) { #if DEBUG - errs() << "[Loop toExplore] cur inst = orig input\n"; - errs() << "[Loop toExplore] Call traverseLocal with cur inst (tainted), orig input (srcInput), caller (none)\n"; + errs() << "[Loop toExplore] curInst = inputInst\n"; + errs() << "[Loop toExplore] Call traverseLocal with curInst (tainted), origInput (srcInput), caller (none)\n"; #endif - interProcFlows = traverseLocal(curVal, inputInst, &taintedInsts, nullptr); + interProcFlows = traverseLocal(curInst, inputInst, &taintedInsts, nullptr); #if DEBUG - errs() << "[Loop toExplore] [cur inst = orig input] Inspect interProcFlows:\n"; + errs() << "[Loop toExplore][curInst = inputInst] Inspect interProcFlows:\n"; #endif for (auto* vipf : interProcFlows) { if (auto* iipf = dyn_cast(vipf)) { @@ -59,19 +59,19 @@ inst_insts_map buildInputs(Module* M) { } #if DEBUG - errs() << "Adding orig input (" << *inputInst << ") to set at " << *iipf << "\n"; + errs() << "Add inputInst (" << *inputInst << ") to set at " << *iipf << "\n"; #endif taintedInsts[iipf].insert(inputInst); } } - } else if (isa(curVal)) { + } else if (isa(curInst)) { #if DEBUG errs() << "[Loop toExplore] cur inst = CallInst\n"; #endif // Note it will not be iop, even though iop is a call // This case handles both returns and pbref - promotedInputs.push_back(dyn_cast(curVal)); + promotedInputs.push_back(dyn_cast(curInst)); auto* next = toExplore.front(); toExplore.pop(); // If the next is a return, this was a return flow @@ -81,11 +81,11 @@ inst_insts_map buildInputs(Module* M) { #if DEBUG errs() << "[Loop toExplore] cur inst next = Return inst (return flow)\n"; #endif - interProcFlows = traverseLocal(curVal, dyn_cast(curVal), &taintedInsts, nullptr); + interProcFlows = traverseLocal(curInst, dyn_cast(curInst), &taintedInsts, nullptr); for (Value* vipf : interProcFlows) { if (Instruction* iipf = dyn_cast(vipf)) { // don't add self - if (curVal == vipf) { + if (curInst == vipf) { continue; } if (CallInst* anno_check = dyn_cast(iipf)) { @@ -95,7 +95,7 @@ inst_insts_map buildInputs(Module* M) { continue; } } - taintedInsts[iipf].insert(dyn_cast(curVal)); + taintedInsts[iipf].insert(dyn_cast(curInst)); } } } else if (isa(next)) { @@ -105,7 +105,7 @@ inst_insts_map buildInputs(Module* M) { // Grab the para corresponding to the argument int index = -1; int i = 0; - CallInst* ci = dyn_cast(curVal); + CallInst* ci = dyn_cast(curInst); if (ci->getCalledFunction() == NULL) continue; if (ci->getCalledFunction()->empty()) continue; @@ -200,7 +200,7 @@ inst_insts_map buildInputs(Module* M) { } // re nullptr check if (fstUse != nullptr) { - interProcFlows = traverseLocal(fstUse, dyn_cast(curVal), &taintedInsts, nullptr); + interProcFlows = traverseLocal(fstUse, dyn_cast(curInst), &taintedInsts, nullptr); for (Value* vipf : interProcFlows) { if (Instruction* iipf = dyn_cast(vipf)) { if (CallInst* anno_check = dyn_cast(iipf)) { @@ -210,15 +210,15 @@ inst_insts_map buildInputs(Module* M) { continue; } } - taintedInsts[iipf].insert(dyn_cast(curVal)); + taintedInsts[iipf].insert(dyn_cast(curInst)); } } } } } - } else if (isa(curVal)) { + } else if (isa(curInst)) { #if DEBUG - errs() << "[Loop toExplore] cur inst = Argument (tainted arg)\n"; + errs() << "[Loop toExplore] curInst = Argument (tainted arg)\n"; #endif auto* caller = dyn_cast(toExplore.front()); @@ -231,11 +231,11 @@ inst_insts_map buildInputs(Module* M) { auto* innerInputInst = dyn_cast(toExplore.front()); toExplore.pop(); #if DEBUG - errs() << "[Loop toExplore] orig input: " << *innerInputInst << "\n"; - errs() << "[Loop toExplore] Call traverseLocal with cur inst (tainted), orig input (srcInput), caller\n"; + errs() << "[Loop toExplore] inputInst: " << *innerInputInst << "\n"; + errs() << "[Loop toExplore] Call traverseLocal with curInst (tainted), inputInst, caller\n"; #endif - interProcFlows = traverseLocal(curVal, innerInputInst, &taintedInsts, caller); + interProcFlows = traverseLocal(curInst, innerInputInst, &taintedInsts, caller); #if DEBUG errs() << "[Loop toExplore] Inspect interProcFlows:\n"; @@ -282,26 +282,26 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai errs() << "=== traverseLocal ===\n"; #endif - val_vec interProcSinks; + val_vec interProcFlows; std::queue localDeps; #if DEBUG - errs() << "Add cur inst to localDeps\n"; + errs() << "Add tainted inst to localDeps\n"; #endif localDeps.push(tainted); while (!localDeps.empty()) { #if DEBUG errs() << "=== Loop localDeps ===\n"; #endif - auto* curVal = localDeps.front(); + auto* curInst = localDeps.front(); localDeps.pop(); #if DEBUG - errs() << "[Loop localDeps] cur inst: " << *curVal << "\n"; + errs() << "[Loop localDeps] curInst: " << *curInst << "\n"; #endif val_vec customUsers; - if (auto* si = dyn_cast(curVal)) { + if (auto* si = dyn_cast(curInst)) { #if DEBUG - errs() << "[Loop localDeps] cur inst = StoreInst\n"; + errs() << "[Loop localDeps] curInst = StoreInst\n"; #endif // Add the pointer to deps, as stores have no uses // Add info on the store to the map @@ -314,32 +314,46 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai seti.insert(srcInput); taintedInsts->emplace(si, seti); } + + // See if it is (or aliases?) one of the function arguments (PBRef comp) + auto* storePtr = si->getPointerOperand()->stripPointerCasts(); + errs() << "[Loop args] storePtr: " << *storePtr << "\n"; #if DEBUG - errs() << "[Loop localDeps] Adding orig input (" << *srcInput << ") to set at cur inst (" << *si << ")\n"; + errs() << "[Loop localDeps] Go over fun args\n"; #endif - // See if it is (or aliases?) one of the function arguments (PBRef comp) for (auto& arg : si->getFunction()->args()) { - auto* storePtr = si->getPointerOperand()->stripPointerCasts(); #if DEBUG - errs() << "[Loop localDeps] Is ptr being stored to (" << *storePtr << ") = fun arg (" << arg << ")\n"; + errs() << "[Loop args] arg: " << arg << "\n"; + // errs() << "[Loop localDeps] Is ptr being stored to (" << *storePtr << ") = fun arg (" << arg << ")\n"; #endif if (storePtr == &arg) { - // if taint came from inside any callsite is potentially tainted + // storePtr: _x_ = input(); + // arg: Consistent(_x_, 1); +#if DEBUG + errs() << "[Loop args] storePtr = arg\n"; +#endif + // If taint came from inside any callsite is potentially tainted if (caller == nullptr) { +#if DEBUG + errs() << "[Loop args] Caller = nullptr"; +#endif for (auto calls : si->getFunction()->users()) { - interProcSinks.push_back(calls); - interProcSinks.push_back(dyn_cast(&arg)); + interProcFlows.push_back(calls); + interProcFlows.push_back(dyn_cast(&arg)); if (auto key = dyn_cast(calls)) { - // check to make sure not already visited + // Check to make sure not already visited // taintedInsts->at(key).insert(srcOp); } } } else { - // otherwise, just the caller's - interProcSinks.push_back(caller); - interProcSinks.push_back(dyn_cast(&arg)); +#if DEBUG + errs() << "[Loop args] Caller: " << *caller << "\n"; +#endif + // Otherwise, just the caller's + interProcFlows.push_back(caller); + interProcFlows.push_back(dyn_cast(&arg)); if (auto key = dyn_cast(caller)) { - // check to make sure not already visited + // Check to make sure not already visited // taintedInsts->at(key).insert(srcOp); } } @@ -357,14 +371,14 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai if (auto* useOfStore = dyn_cast(use)) { if (storePrecedesUse(useOfStore, si)) { #if DEBUG - errs() << "[Loop Store Users] store precedes this use, add:" << *useOfStore << "\n"; + errs() << "[Loop Store Users] Store precedes this use, add:" << *useOfStore << "to customUsers\n"; #endif customUsers.push_back(useOfStore); } } } // Update curVal to be the pointer - curVal = si->getPointerOperand(); + curInst = si->getPointerOperand(); // If it's a gepi, see if there are others that occur afterwards if (isa(si->getPointerOperand())) { @@ -375,22 +389,22 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai // check pbref, need to compare op of the gepi, not gepi itself for (auto& arg : si->getFunction()->args()) { #if DEBUG - errs() << " PBRef comp: " << *dyn_cast(curVal)->getOperand(0) << " and " << arg << "\n"; + errs() << " PBRef comp: " << *dyn_cast(curInst)->getOperand(0) << " and " << arg << "\n"; #endif - if (dyn_cast(curVal)->getOperand(0) == &arg) { + if (dyn_cast(curInst)->getOperand(0) == &arg) { // if taint came from inside any callsite is potentially tainted if (caller == nullptr) { for (Value* calls : si->getFunction()->users()) { - interProcSinks.push_back(calls); - interProcSinks.push_back(dyn_cast(&arg)); + interProcFlows.push_back(calls); + interProcFlows.push_back(dyn_cast(&arg)); if (Instruction* key = dyn_cast(calls)) { // taintedInsts->at(key).insert(srcOp); } } } else { // otherwise, just the caller's - interProcSinks.push_back(caller); - interProcSinks.push_back(dyn_cast(&arg)); + interProcFlows.push_back(caller); + interProcFlows.push_back(dyn_cast(&arg)); if (Instruction* key = dyn_cast(caller)) { // taintedInsts->at(key).insert(srcOp); } @@ -401,12 +415,12 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai } else { #if DEBUG - errs() << "[Loop localDeps] cur inst != StoreInst\n"; - errs() << "[Loop localDeps] Add users of cur inst to customUsers:\n"; - for (auto* use : curVal->users()) errs() << *use << "\n"; + errs() << "[Loop localDeps] curInst != StoreInst\n"; + errs() << "[Loop localDeps] Add users of curInst to customUsers:\n"; + for (auto* use : curInst->users()) errs() << *use << "\n"; #endif // If not a store, do normal users of curVal - customUsers.insert(customUsers.end(), curVal->user_begin(), curVal->user_end()); + customUsers.insert(customUsers.end(), curInst->user_begin(), curInst->user_end()); } #if DEBUG @@ -428,9 +442,9 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai #endif for (auto calls : ri->getFunction()->users()) { if (auto ci = dyn_cast(calls)) { - interProcSinks.push_back(calls); + interProcFlows.push_back(calls); // extra for bookkeeping - interProcSinks.push_back(use); + interProcFlows.push_back(use); } } } else { @@ -438,9 +452,9 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai errs() << "[Loop customUsers] Some caller\n"; #endif // otherwise, just the caller's - interProcSinks.push_back(caller); + interProcFlows.push_back(caller); // extra for bookkeeping - interProcSinks.push_back(use); + interProcFlows.push_back(use); } } else if (auto* ci = dyn_cast(use)) { #if DEBUG @@ -475,16 +489,16 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai // if taint came from inside any callsite is potentially tainted if (caller == nullptr) { for (Value* calls : ci->getFunction()->users()) { - interProcSinks.push_back(calls); - interProcSinks.push_back(dyn_cast(&arg)); + interProcFlows.push_back(calls); + interProcFlows.push_back(dyn_cast(&arg)); if (Instruction* key = dyn_cast(calls)) { // taintedInsts->at(key).insert(srcOp); } } } else { // otherwise, just the caller's - interProcSinks.push_back(caller); - interProcSinks.push_back(dyn_cast(&arg)); + interProcFlows.push_back(caller); + interProcFlows.push_back(dyn_cast(&arg)); if (Instruction* key = dyn_cast(caller)) { // taintedInsts->at(key).insert(srcOp); } @@ -532,22 +546,24 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai } unsigned int arg_num = ci->arg_size(); + auto funName = calledFun->getName(); #if DEBUG - errs() << "[Loop customUsers] Find tainted arg of " << calledFun->getName() << "\n"; + errs() << "[Loop customUsers] Find tainted arg of " << funName << "\n"; #endif - // Find the index of the tainted argument + // Find the param index of the tainted argument for (unsigned int i = 0; i < arg_num; i++) { auto* arg = ci->getArgOperand(i); - if (arg == curVal) { - auto funArg = calledFun->arg_begin() + i; + if (arg == curInst) { + auto param = calledFun->arg_begin() + i; #if DEBUG - errs() << "Found tainted arg: " << *arg << ", add fun arg (" << *funArg << "), the use (" << *ci << "), and orig input (" << *srcInput << ") to interProcFlows\n"; + errs() << "[Loop customUsers] Found tainted arg of " << funName << ": " << *arg << "\n"; + errs() << "[Loop customUsers] Add to interProcFlows the corresp. param " << *param << ", the call " << *ci << ", and srcInput " << *srcInput << "\n"; #endif - interProcSinks.push_back(funArg); + interProcFlows.push_back(param); // MUST also push back the call inst. - interProcSinks.push_back(ci); + interProcFlows.push_back(ci); // MUST also push back the current srcInput - interProcSinks.push_back(srcInput); + interProcFlows.push_back(srcInput); if (auto* key = dyn_cast(ci)) { // taintedInsts->at(key).insert(srcOp); } @@ -556,8 +572,10 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai } } else if (auto* iUse = dyn_cast(use)) { #if DEBUG - errs() << "[Loop customUsers] use != ReturnInst & use != CallInst\n"; + errs() << "[Loop customUsers] use != ReturnInst & use != CallInst:\n"; + errs() << *iUse << "\n"; #endif + if (iUse->isTerminator()) { if (iUse->getNumSuccessors() > 1) { // Add control deps off of a branch. @@ -589,7 +607,7 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai #if DEBUG errs() << "*** traverseLocal ***\n"; #endif - return interProcSinks; + return interProcFlows; } inst_vec findInputInsts(Module* M) { @@ -615,7 +633,6 @@ inst_vec findInputInsts(Module* M) { errs() << "Found IO call: " << I << "\n"; #endif inputInsts.push_back(&I); - break; } } } diff --git a/ocelot/AtomicRegionInference/src/include/Helpers.h b/ocelot/AtomicRegionInference/src/include/Helpers.h index bfb6901..d5f553d 100644 --- a/ocelot/AtomicRegionInference/src/include/Helpers.h +++ b/ocelot/AtomicRegionInference/src/include/Helpers.h @@ -14,5 +14,6 @@ std::string getSimpleNodeLabel(const Value* Node); bool isAnnot(const StringRef annotName); void printInstInsts(const inst_insts_map& iim, bool onlyCalls = false); void printInsts(const inst_vec& iv); +void printIntInsts(const std::map& iim); #endif \ No newline at end of file From 7c01f9ab37804681ff6332d11cca33c556efad5d Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Sat, 3 Feb 2024 21:38:54 -0500 Subject: [PATCH 07/18] [InferAtomsPass] Minimize consistent atomic regions ...by moving non-IO instructions out of regions. --- benchmarks/ctests/example04.ll | 76 +++++++++++++++++++ .../src/InferFreshCons.cpp | 25 +++--- 2 files changed, 92 insertions(+), 9 deletions(-) create mode 100644 benchmarks/ctests/example04.ll diff --git a/benchmarks/ctests/example04.ll b/benchmarks/ctests/example04.ll new file mode 100644 index 0000000..d05e743 --- /dev/null +++ b/benchmarks/ctests/example04.ll @@ -0,0 +1,76 @@ +; ModuleID = '../../benchmarks/ctests/example04.c' +source_filename = "../../benchmarks/ctests/example04.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %y = alloca i32, align 4 + call void @atomic_start() + %call = call i32 @input() + %call1 = call i32 @input() + call void @atomic_end() + store i32 %call, ptr %x, align 4 + store i32 %call1, ptr %y, align 4 + %0 = load i32, ptr %y, align 4 + call void @log(i32 noundef %0) + %1 = load i32, ptr %x, align 4 + call void undef(i32 noundef %1, i32 noundef 1) + %2 = load i32, ptr %y, align 4 + call void undef(i32 noundef %2, i32 noundef 1) + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index 8dc4375..b6d100a 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -196,9 +196,10 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { #if DEBUG errs() << I << "\n"; #endif - if (!isa(I) && find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end()) { + if (!isa(I)) { + auto shouldDelay = find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end(); #if DEBUG - errs() << "Should be delayed\n"; + errs() << " Should" << (shouldDelay ? " " : " NOT ") << "be delayed\n"; #endif Instruction* clone; @@ -219,9 +220,13 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { } } } else if (isa(&I)) { - clone = I.clone(); + // In case I is an IO function call, we don't clone it + // and instead map it to itself for referencing later - if (auto* op = dyn_cast(I.getOperand(0))) { + clone = shouldDelay ? I.clone() : &I; + + if (shouldDelay && I.getNumOperands() > 1) { + auto* op = dyn_cast(I.getOperand(0)); inst_inst_map::iterator it = clonedInsts.find(op); assert(it != clonedInsts.end()); clone->setOperand(0, it->second); @@ -234,15 +239,17 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { assert(it != clonedInsts.end()); clone->setOperand(0, it->second); } - } - // e.g., LoadInst - else { + } else { + // E.g., LoadInst clone = I.clone(); } clonedInsts.emplace(&I, clone); - toDelete.emplace(&I); - toDelay.push_back(clone); + + if (shouldDelay) { + toDelete.emplace(&I); + toDelay.push_back(clone); + } } } From e7b9bfa8fd712fb9c079a7d34a91867110b651a9 Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Sun, 4 Feb 2024 20:01:03 -0500 Subject: [PATCH 08/18] [WIP][InferAtomsPass] Optimize Consistent and FreshConsistent atomic regions Mostly working, except optimizations done on a FreshConsistent region need to converge back into a single (nested) region. --- benchmarks/ctests/example03.ll | 16 ++-- benchmarks/ctests/example04.ll | 12 ++- .../AtomicRegionInference/src/InferAtoms.cpp | 53 ++++++------ .../src/InferFreshCons.cpp | 81 +++++++++++++++---- .../src/include/InferAtoms.h | 2 +- .../src/include/InferFreshCons.h | 6 +- 6 files changed, 111 insertions(+), 59 deletions(-) diff --git a/benchmarks/ctests/example03.ll b/benchmarks/ctests/example03.ll index df86c33..f642b6b 100644 --- a/benchmarks/ctests/example03.ll +++ b/benchmarks/ctests/example03.ll @@ -45,15 +45,15 @@ entry: call void @atomic_start() %call = call i32 @input() store i32 %call, ptr %x, align 4 - store i32 1, ptr %y, align 4 - %0 = load i32, ptr %y, align 4 - %add = add nsw i32 %0, 1 - store i32 %add, ptr %z, align 4 - %1 = load i32, ptr %z, align 4 - call void @log(i32 noundef %1) - %2 = load i32, ptr %x, align 4 - call void @log(i32 noundef %2) + %0 = load i32, ptr %x, align 4 + call void @log(i32 noundef %0) call void @atomic_end() + store i32 1, ptr %y, align 4 + %1 = load i32, ptr %y, align 4 + %2 = add nsw i32 %1, 1 + store i32 %2, ptr %z, align 4 + %3 = load i32, ptr %z, align 4 + call void @log(i32 noundef %3) ret void } diff --git a/benchmarks/ctests/example04.ll b/benchmarks/ctests/example04.ll index d05e743..1040290 100644 --- a/benchmarks/ctests/example04.ll +++ b/benchmarks/ctests/example04.ll @@ -42,17 +42,15 @@ entry: %x = alloca i32, align 4 %y = alloca i32, align 4 call void @atomic_start() - %call = call i32 @input() %call1 = call i32 @input() - call void @atomic_end() - store i32 %call, ptr %x, align 4 store i32 %call1, ptr %y, align 4 %0 = load i32, ptr %y, align 4 call void @log(i32 noundef %0) - %1 = load i32, ptr %x, align 4 - call void undef(i32 noundef %1, i32 noundef 1) - %2 = load i32, ptr %y, align 4 - call void undef(i32 noundef %2, i32 noundef 1) + call void @atomic_end() + call void @atomic_start() + %1 = call i32 @input() + call void @atomic_end() + store i32 %1, ptr %x, align 4 ret void } diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp index be7f108..c085c32 100644 --- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp +++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp @@ -32,21 +32,21 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) { inst_insts_map inputMap = buildInputs(this->M); errs() << "inputMap:\n"; printInstInsts(inputMap); - inst_vec toDelete; - getAnnotations(&consVars, &freshVars, inputMap, &toDelete); - // TODO: need to add unique point of call chain prefix to cons set + inst_vec toDeleteAnnots; + getAnnotations(&consVars, &freshVars, inputMap, &toDeleteAnnots); + // TODO: Need to add unique point of call chain prefix to cons set #if DEBUG - errs() << "Initial Fresh:\n"; - for (auto& insts : freshVars) + errs() << "Initial Consistent:\n"; + for (auto& [_, insts] : consVars) { for (auto* inst : insts) errs() << *inst << "\n"; + } #endif #if DEBUG - errs() << "Initial Consistent:\n"; - for (auto& [_, insts] : consVars) { + errs() << "Initial Fresh:\n"; + for (auto& insts : freshVars) for (auto* inst : insts) errs() << *inst << "\n"; - } #endif #if DEBUG @@ -58,13 +58,13 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) { auto allFresh = collectFresh(freshVars, inputMap); #if DEBUG - errs() << "Fresh after collect: \n"; - for (auto& varSet : allFresh) - for (auto* var : varSet) errs() << *var << "\n"; + errs() << "Fresh sets after collect: \n"; + for (auto& freshSet : allFresh) + for (auto* inst : freshSet) errs() << *inst << "\n"; #endif #if DEBUG - errs() << "Consistent after collect: \n"; + errs() << "Cons. sets after collect: \n"; for (auto& [_, insts] : allConsSets) for (auto* inst : insts) errs() << *inst << "\n"; #endif @@ -72,11 +72,11 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) { // Consistent first InferFreshCons* ci = new InferFreshCons(&FAM, &M, atomStart, atomEnd); - ci->inferConsistent(allConsSets); - ci->inferFresh(allFresh); + ci->inferCons(allConsSets, &allFresh, &toDeleteAnnots); + ci->inferFresh(allFresh, &toDeleteAnnots); // Delete annotations - removeAnnotations(&toDelete); + removeAnnotations(toDeleteAnnots); return PreservedAnalyses::none(); } @@ -97,9 +97,10 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ auto* fun = ci->getCalledFunction(); // Various empty or null checks if (fun == NULL || fun->empty() || !fun->hasName()) continue; - // Consistent and FreshConsistent - // TODO: Fix FreshConsistent - if (isAnnot(fun->getName()) && !fun->getName().equals("Fresh")) { + auto funName = fun->getName(); + // Consistent & FreshConsistent + if (isAnnot(funName) && !funName.equals("Fresh")) { + errs() << "getAnnot: " << ci << "\n"; toDelete->push_back(ci); int setID; // Bit cast use of x, then value operand of store @@ -245,12 +246,18 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ printIntInsts(*consVars); #endif } - } else if (fun->getName().equals("Fresh")) { + } + + // Fresh & FreshConsistent + if (isAnnot(funName) && !funName.equals("Consistent")) { #if DEBUG errs() << "[Loop Inst] Calls Fresh\n"; #endif std::set v; - toDelete->push_back(ci); + if (find(toDelete->begin(), toDelete->end(), ci) == toDelete->end()) { + errs() << "getAnnot: " << ci << "\n"; + toDelete->push_back(ci); + } #if DEBUG errs() << "[Loop Inst] Print inputMap entries:\n"; @@ -340,7 +347,7 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ #endif } -void InferAtomsPass::removeAnnotations(inst_vec* toDelete) { +void InferAtomsPass::removeAnnotations(inst_vec& toDelete) { std::vector toDeleteF; // Delete all annotation function calls @@ -353,7 +360,7 @@ void InferAtomsPass::removeAnnotations(inst_vec* toDelete) { for (; I != B.end(); I++) { if (auto* ci = dyn_cast(I)) { // TODO: no need to confirm in toDelete? - if (std::find(toDelete->begin(), toDelete->end(), &*I) != toDelete->end()) { + if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) { #if DEBUG errs() << "Remove call: " << *I << "\n"; #endif @@ -377,7 +384,7 @@ void InferAtomsPass::removeAnnotations(inst_vec* toDelete) { } // Delete all annotation function defs - for (auto F : toDeleteF) { + for (auto* F : toDeleteF) { #if DEBUG errs() << "Remove function " << F->getName() << "\n"; #endif diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index b6d100a..9aff828 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -59,27 +59,36 @@ BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) { } // Top level region inference function -- could flatten later -void InferFreshCons::inferConsistent(std::map consSets) { +void InferFreshCons::inferCons(std::map consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots) { +#if DEBUG + errs() << "=== inferConsistent ===\n"; +#endif for (auto& [id, set] : consSets) { #if DEBUG errs() << "[InferConsistent] Adding region for set " << id << "\n"; #endif - addRegion(set, Consistent); + addRegion(set, freshSets, toDeleteAnnots); } +#if DEBUG + errs() << "*** inferConsistent ***\n"; +#endif } // The only difference is outer map vs outer vec -void InferFreshCons::inferFresh(inst_vec_vec freshSets) { +void InferFreshCons::inferFresh(inst_vec_vec freshSets, inst_vec* toDeleteAnnots) { #if DEBUG errs() << "=== inferFresh ===\n"; #endif - for (auto freshSet : freshSets) addRegion(freshSet, Fresh); + + for (auto freshSet : freshSets) { + addRegion(freshSet, nullptr, toDeleteAnnots); + } #if DEBUG errs() << "*** inferFresh ***\n"; #endif } -void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { +void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_vec* toDeleteAnnots) { #if DEBUG errs() << "=== addRegion ===\n"; #endif @@ -129,11 +138,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { // so only explore a caller if it's in conSet bool first = true; for (auto* use : curFun->users()) { - // if (regionKind == 1) { if (!(find(targetInsts.begin(), targetInsts.end(), use) != targetInsts.end())) continue; - // errs() << "Use: "<< *use << " is in call chain\n"; - //} auto* inst = dyn_cast(use); #if DEBUGINFER errs() << "DEBUGINFER: examining use: " << *inst << "\n"; @@ -190,7 +196,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { // Instructions to be delayed till the end of the block inst_vec toDelay; // (The original) instructions to be deleted - inst_set toDelete; + inst_vec toDelete; for (auto& I : *B) { #if DEBUG @@ -247,7 +253,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { clonedInsts.emplace(&I, clone); if (shouldDelay) { - toDelete.emplace(&I); + toDelete.push_back(&I); toDelay.push_back(clone); } } @@ -258,13 +264,16 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { // in the original order for (auto* I : toDelay) builder.Insert(I); +#if DEBUG + errs() << "Delete originals:\n"; +#endif auto I = B->begin(); // Delete the originals for (; I != B->end();) { #if DEBUG errs() << *I << "\n"; #endif - if (toDelete.find(&*I) != toDelete.end()) { + if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) { #if DEBUG errs() << "Deleted\n"; #endif @@ -273,6 +282,33 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { I++; } + // Sync freshSets + if (other != nullptr) { + for (auto& set : *other) { + for (size_t i = 0; i < set.size(); i++) { + auto it = find(toDelete.begin(), toDelete.end(), set[i]); + if (it != toDelete.end()) { + auto idx = std::distance(toDelete.begin(), it); + auto* newInst = toDelay[idx]; + set[i] = newInst; + } + } + } + } + + // Sync toDelete + if (toDeleteAnnots != nullptr) { + for (size_t i = 0; i < toDeleteAnnots->size(); i++) { + auto* annot = toDeleteAnnots->at(i); + auto it = find(toDelete.begin(), toDelete.end(), annot); + if (it != toDelete.end()) { + auto idx = std::distance(toDelete.begin(), it); + auto* newAnnot = toDelay[idx]; + toDeleteAnnots->at(i) = newAnnot; + } + } + } + #if DEBUG errs() << "After: " << *B << "\n"; #endif @@ -382,10 +418,10 @@ Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set errs() << "=== truncate ===\n"; #endif -#if DEBUG - errs() << "Set:\n"; - printInsts(set); -#endif + // #if DEBUG + // errs() << "Set:\n"; + // printInsts(set); + // #endif // Truncate the front if (forwards) { @@ -660,7 +696,18 @@ int InferFreshCons::getSubLength(BasicBlock* B, Instruction* end, std::vector blockMap) { auto* BComp = blockMap.begin()->second->getParent(); - for (auto& [_, B] : blockMap) - if (B->getParent() != BComp) return false; + + for (auto& [I, B] : blockMap) { + if (B->getParent() != BComp) { +#if DEBUG + errs() << "Blocks are NOT in same fun\n"; +#endif + return false; + } + } + +#if DEBUG + errs() << "Blocks are in same fun\n"; +#endif return true; } diff --git a/ocelot/AtomicRegionInference/src/include/InferAtoms.h b/ocelot/AtomicRegionInference/src/include/InferAtoms.h index 217f92b..1da8c5a 100644 --- a/ocelot/AtomicRegionInference/src/include/InferAtoms.h +++ b/ocelot/AtomicRegionInference/src/include/InferAtoms.h @@ -29,7 +29,7 @@ struct InferAtomsPass : public PassInfoMixin { void getAnnotations(std::map* consVars, inst_vec_vec* freshVars, inst_insts_map inputMap, inst_vec* toDelete); inst_vec_vec collectFresh(inst_vec_vec startingPoints, inst_insts_map info); std::map collectCons(std::map startingPointa, inst_insts_map inputMap); - void removeAnnotations(inst_vec* toDelete); + void removeAnnotations(inst_vec& toDelete); void setModule(Module* _M) { M = _M; } private: diff --git a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h index b3fcd10..e76469c 100644 --- a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h +++ b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h @@ -20,9 +20,9 @@ struct InferFreshCons { enum InsertKind { Start, End }; - void inferConsistent(std::map allSets); - void inferFresh(inst_vec_vec allSets); - void addRegion(inst_vec conSet, RegionKind regionKind); + void inferCons(std::map consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots); + void inferFresh(inst_vec_vec freshSets, inst_vec* toDeleteAnnots); + void addRegion(inst_vec conSet, inst_vec_vec* other, inst_vec* toDeleteAnnots); Function* findCandidate(std::map blocks, Function* root); Instruction* insertRegionInst(InsertKind insertKind, Instruction* insertBefore); bool sameFunction(std::map blockMap); From ebc7cc76052a6d0e3ea28b736698a5e5e22182b9 Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Mon, 5 Feb 2024 00:16:36 -0500 Subject: [PATCH 09/18] [InferAtomsPass] Demo Consistent region optimization --- benchmarks/ctests/example04.c | 3 ++- benchmarks/ctests/example04.ll | 8 +++----- benchmarks/ctests/example04.orig.ll | 2 +- ocelot/AtomicRegionInference/src/InferAtoms.cpp | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/benchmarks/ctests/example04.c b/benchmarks/ctests/example04.c index 5cbb707..5593ac0 100644 --- a/benchmarks/ctests/example04.c +++ b/benchmarks/ctests/example04.c @@ -19,7 +19,8 @@ void app() { int y = input(); log(y); Consistent(x, 1); - FreshConsistent(y, 1); + Consistent(y, 1); + // FreshConsistent(y, 1); } int main() { diff --git a/benchmarks/ctests/example04.ll b/benchmarks/ctests/example04.ll index 1040290..6e43a07 100644 --- a/benchmarks/ctests/example04.ll +++ b/benchmarks/ctests/example04.ll @@ -42,15 +42,13 @@ entry: %x = alloca i32, align 4 %y = alloca i32, align 4 call void @atomic_start() + %call = call i32 @input() %call1 = call i32 @input() + call void @atomic_end() + store i32 %call, ptr %x, align 4 store i32 %call1, ptr %y, align 4 %0 = load i32, ptr %y, align 4 call void @log(i32 noundef %0) - call void @atomic_end() - call void @atomic_start() - %1 = call i32 @input() - call void @atomic_end() - store i32 %1, ptr %x, align 4 ret void } diff --git a/benchmarks/ctests/example04.orig.ll b/benchmarks/ctests/example04.orig.ll index 8491e4d..f245c19 100644 --- a/benchmarks/ctests/example04.orig.ll +++ b/benchmarks/ctests/example04.orig.ll @@ -78,7 +78,7 @@ entry: %1 = load i32, ptr %x, align 4 call void @Consistent(i32 noundef %1, i32 noundef 1) %2 = load i32, ptr %y, align 4 - call void @FreshConsistent(i32 noundef %2, i32 noundef 1) + call void @Consistent(i32 noundef %2, i32 noundef 1) ret void } diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp index c085c32..5480d1f 100644 --- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp +++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp @@ -359,7 +359,7 @@ void InferAtomsPass::removeAnnotations(inst_vec& toDelete) { auto I = B.begin(); for (; I != B.end(); I++) { if (auto* ci = dyn_cast(I)) { - // TODO: no need to confirm in toDelete? + // TODO: No need to confirm in toDelete? if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) { #if DEBUG errs() << "Remove call: " << *I << "\n"; From 3ab09aaf7845aad60edf42999b2a4704730bf19e Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Mon, 5 Feb 2024 23:25:55 -0500 Subject: [PATCH 10/18] [InferAtomsPass] Fully sound FreshConsistent region inference optimization When a variable has both freshness and consistency constraints, the overlap between the optimized inferred atomic region is now properly handled, by nesting them such that only the outermost bounds count. See benchmarks/ctests/example04.ll for an example. Before: ```llvm define void @app() #0 { entry: %x = alloca i32, align 4 %y = alloca i32, align 4 call void @atomic_start() ; <-- OUTER START %call = call i32 @input() store i32 %call, ptr %x, align 4 call void @atomic_start() ; <-- INNER START %call1 = call i32 @input() call void @atomic_end() ; <-- INNER END store i32 %call1, ptr %y, align 4 %0 = load i32, ptr %x, align 4 call void @log(i32 noundef %0) %1 = load i32, ptr %y, align 4 call void @log(i32 noundef %1) call void @atomic_end() ; <-- OUTER END ret void } ``` After: ```llvm define void @app() #0 { entry: %x = alloca i32, align 4 %y = alloca i32, align 4 call void @atomic_start() ; <-- OUTER START %call = call i32 @input() call void @atomic_start() ; <-- INNER START %call1 = call i32 @input() call void @atomic_end() ; <-- INNER END store i32 %call1, ptr %y, align 4 %0 = load i32, ptr %y, align 4 call void @log(i32 noundef %0) call void @atomic_end() ; <-- OUTER END store i32 %call, ptr %x, align 4 %1 = load i32, ptr %x, align 4 call void @log(i32 noundef %1) ret void } ``` --- benchmarks/ctests/example04.c | 4 +- benchmarks/ctests/example04.ll | 6 +- benchmarks/ctests/example04.orig.ll | 10 +- benchmarks/ctests/example05.c | 25 +++++ benchmarks/ctests/example05.ll | 90 +++++++++++++++++ benchmarks/ctests/example05.orig.ll | 98 +++++++++++++++++++ ocelot/AtomicRegionInference/Makefile | 5 + .../AtomicRegionInference/src/InferAtoms.cpp | 2 +- .../src/InferFreshCons.cpp | 32 ++++-- .../src/include/InferFreshCons.h | 2 +- 10 files changed, 258 insertions(+), 16 deletions(-) create mode 100644 benchmarks/ctests/example05.c create mode 100644 benchmarks/ctests/example05.ll create mode 100644 benchmarks/ctests/example05.orig.ll diff --git a/benchmarks/ctests/example04.c b/benchmarks/ctests/example04.c index 5593ac0..a4463c3 100644 --- a/benchmarks/ctests/example04.c +++ b/benchmarks/ctests/example04.c @@ -17,10 +17,10 @@ void log(int x) { void app() { int x = input(); int y = input(); + log(x); log(y); Consistent(x, 1); - Consistent(y, 1); - // FreshConsistent(y, 1); + FreshConsistent(y, 1); } int main() { diff --git a/benchmarks/ctests/example04.ll b/benchmarks/ctests/example04.ll index 6e43a07..a3a1d72 100644 --- a/benchmarks/ctests/example04.ll +++ b/benchmarks/ctests/example04.ll @@ -43,12 +43,16 @@ entry: %y = alloca i32, align 4 call void @atomic_start() %call = call i32 @input() + call void @atomic_start() %call1 = call i32 @input() call void @atomic_end() - store i32 %call, ptr %x, align 4 store i32 %call1, ptr %y, align 4 %0 = load i32, ptr %y, align 4 call void @log(i32 noundef %0) + call void @atomic_end() + store i32 %call, ptr %x, align 4 + %1 = load i32, ptr %x, align 4 + call void @log(i32 noundef %1) ret void } diff --git a/benchmarks/ctests/example04.orig.ll b/benchmarks/ctests/example04.orig.ll index f245c19..32405f4 100644 --- a/benchmarks/ctests/example04.orig.ll +++ b/benchmarks/ctests/example04.orig.ll @@ -73,12 +73,14 @@ entry: store i32 %call, ptr %x, align 4 %call1 = call i32 @input() store i32 %call1, ptr %y, align 4 - %0 = load i32, ptr %y, align 4 + %0 = load i32, ptr %x, align 4 call void @log(i32 noundef %0) - %1 = load i32, ptr %x, align 4 - call void @Consistent(i32 noundef %1, i32 noundef 1) - %2 = load i32, ptr %y, align 4 + %1 = load i32, ptr %y, align 4 + call void @log(i32 noundef %1) + %2 = load i32, ptr %x, align 4 call void @Consistent(i32 noundef %2, i32 noundef 1) + %3 = load i32, ptr %y, align 4 + call void @FreshConsistent(i32 noundef %3, i32 noundef 1) ret void } diff --git a/benchmarks/ctests/example05.c b/benchmarks/ctests/example05.c new file mode 100644 index 0000000..49fe304 --- /dev/null +++ b/benchmarks/ctests/example05.c @@ -0,0 +1,25 @@ +#include + +void Fresh(int x) {} + +void atomic_start() {} +void atomic_end() {} + +int input() { return 0; } +int (*IO_NAME)() = input; + +void log(int x) { + printf("%d\n", x); +} + +void app() { + int x = input(); + for (int i = 0; i < 10; i++) { + log(x); + } + Fresh(x); +} + +int main() { + app(); +} \ No newline at end of file diff --git a/benchmarks/ctests/example05.ll b/benchmarks/ctests/example05.ll new file mode 100644 index 0000000..7330ad1 --- /dev/null +++ b/benchmarks/ctests/example05.ll @@ -0,0 +1,90 @@ +; ModuleID = '../../benchmarks/ctests/example05.c' +source_filename = "../../benchmarks/ctests/example05.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %i = alloca i32, align 4 + call void @atomic_start() + %call = call i32 @input() + store i32 %call, ptr %x, align 4 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %entry, %for.inc + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 10 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32, ptr %x, align 4 + call void @log(i32 noundef %1) + br label %for.inc + +for.inc: ; preds = %for.body + %2 = load i32, ptr %i, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond, !llvm.loop !5 + +for.end: ; preds = %for.cond + call void @atomic_end() + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} diff --git a/benchmarks/ctests/example05.orig.ll b/benchmarks/ctests/example05.orig.ll new file mode 100644 index 0000000..dc149e7 --- /dev/null +++ b/benchmarks/ctests/example05.orig.ll @@ -0,0 +1,98 @@ +; ModuleID = '../../benchmarks/ctests/example05.c' +source_filename = "../../benchmarks/ctests/example05.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Fresh(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %i = alloca i32, align 4 + %call = call i32 @input() + store i32 %call, ptr %x, align 4 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 10 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32, ptr %x, align 4 + call void @log(i32 noundef %1) + br label %for.inc + +for.inc: ; preds = %for.body + %2 = load i32, ptr %i, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond, !llvm.loop !5 + +for.end: ; preds = %for.cond + %3 = load i32, ptr %x, align 4 + call void @Fresh(i32 noundef %3) + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile index 360ec68..e4d21fa 100644 --- a/ocelot/AtomicRegionInference/Makefile +++ b/ocelot/AtomicRegionInference/Makefile @@ -5,6 +5,7 @@ all: make eg2 make eg3 make eg4 + make eg5 eg1: TEST=example01 make test @@ -14,6 +15,8 @@ eg3: TEST=example03 make test eg4: TEST=example04 make test +eg5: + TEST=example05 make test run_eg1: TEST=example01 make run @@ -23,6 +26,8 @@ run_eg3: TEST=example03 make run run_eg4: TEST=example04 make run +run_eg5: + TEST=example05 make run test: $(MAKE) -C build diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp index 5480d1f..2317a2b 100644 --- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp +++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp @@ -73,7 +73,7 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) { InferFreshCons* ci = new InferFreshCons(&FAM, &M, atomStart, atomEnd); ci->inferCons(allConsSets, &allFresh, &toDeleteAnnots); - ci->inferFresh(allFresh, &toDeleteAnnots); + ci->inferFresh(allFresh, &allConsSets, &toDeleteAnnots); // Delete annotations removeAnnotations(toDeleteAnnots); diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index 9aff828..cc2eb52 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -61,27 +61,30 @@ BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) { // Top level region inference function -- could flatten later void InferFreshCons::inferCons(std::map consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots) { #if DEBUG - errs() << "=== inferConsistent ===\n"; + errs() << "=== inferCons ===\n"; #endif for (auto& [id, set] : consSets) { #if DEBUG - errs() << "[InferConsistent] Adding region for set " << id << "\n"; + errs() << "[inferCons] Adding region for set " << id << "\n"; #endif addRegion(set, freshSets, toDeleteAnnots); } #if DEBUG - errs() << "*** inferConsistent ***\n"; + errs() << "*** inferCons ***\n"; #endif } // The only difference is outer map vs outer vec -void InferFreshCons::inferFresh(inst_vec_vec freshSets, inst_vec* toDeleteAnnots) { +void InferFreshCons::inferFresh(inst_vec_vec freshSets, std::map* consSets, inst_vec* toDeleteAnnots) { #if DEBUG errs() << "=== inferFresh ===\n"; #endif + std::vector consVec; + for (auto& [_, consSet] : *consSets) consVec.push_back(consSet); + for (auto freshSet : freshSets) { - addRegion(freshSet, nullptr, toDeleteAnnots); + addRegion(freshSet, &consVec, toDeleteAnnots); } #if DEBUG errs() << "*** inferFresh ***\n"; @@ -202,8 +205,23 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v #if DEBUG errs() << I << "\n"; #endif - if (!isa(I)) { - auto shouldDelay = find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end(); + bool isRegionBoundary = false; + if (auto* ci = dyn_cast(&I)) { + auto funName = ci->getCalledFunction()->getName(); + isRegionBoundary = + funName.equals("atomic_start") || funName.equals("atomic_end"); + } + + // Only attempt to schedule instruction if it's not alloca or a region boundary + if (!isa(I) && !isRegionBoundary) { + bool inExistingSet = false; + for (auto insts : *other) { + if (find(insts.begin(), insts.end(), &I) != insts.end()) { + inExistingSet = true; + } + } + + auto shouldDelay = find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end() && !inExistingSet; #if DEBUG errs() << " Should" << (shouldDelay ? " " : " NOT ") << "be delayed\n"; #endif diff --git a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h index e76469c..e8dfc8a 100644 --- a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h +++ b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h @@ -21,7 +21,7 @@ struct InferFreshCons { End }; void inferCons(std::map consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots); - void inferFresh(inst_vec_vec freshSets, inst_vec* toDeleteAnnots); + void inferFresh(inst_vec_vec freshSets, std::map* consSets, inst_vec* toDeleteAnnots); void addRegion(inst_vec conSet, inst_vec_vec* other, inst_vec* toDeleteAnnots); Function* findCandidate(std::map blocks, Function* root); Instruction* insertRegionInst(InsertKind insertKind, Instruction* insertBefore); From 6859d351b8a7b69056f6c8f095649360da008b2d Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Mon, 12 Feb 2024 02:27:12 -0500 Subject: [PATCH 11/18] [InferAtomsPass] Handle cases where IO calls have arguments and explore optimizing loops One objective as of now is to make optimizations even more robust by supporting more corner cases. For an example where the IO function is `input(int i)` (`benchmarks/ctests/example06.c`), optimizations shouldn't incorrectly delay the instructions related to the argument `i`, and should instead produce: ```llvm define void @app() #0 { entry: %i = alloca i32, align 4 %x = alloca i32, align 4 store i32 1, ptr %i, align 4 <-- %0 = load i32, ptr %i, align 4 <-- call void @atomic_start() %call = call i32 @input(i32 noundef %0) <-- DEPENDS ON THE ABOVE store i32 %call, ptr %x, align 4 %1 = load i32, ptr %x, align 4 call void @log(i32 noundef %1) call void @atomic_end() ret void } ``` As for loop optimizations, unlike WARio (which targets checkpointing runtimes), loop unrolling (i.e., creating multiple smaller copies of the loop) doesn't help in atomic region inference, since these loops must still be in the same region. Thus, the "costliness" of the region won't be lessened. There are optimizations to be done though. For instance, loops entirely untainted by inputs under constraint(s) can be delayed and moved out of atomic regions just like many other instructions can. The difficulty with this part lies in rewiring the complex branching/connections among the basic blocks that form these loops, making an optimizing analysis harder to devise. `benchmarks/ctests/example05` illustrates an instance where the optimization above applies. I will be working on this as a next step. --- benchmarks/ctests/example05.c | 3 + benchmarks/ctests/example05.ll | 21 +++ benchmarks/ctests/example05.orig.ll | 25 ++- benchmarks/ctests/example06.c | 25 +++ benchmarks/ctests/example06.ll | 75 +++++++++ benchmarks/ctests/example06.orig.ll | 93 +++++++++++ ocelot/AtomicRegionInference/Makefile | 5 + .../AtomicRegionInference/src/InferAtoms.cpp | 56 +++---- .../src/InferFreshCons.cpp | 154 +++++++++++++----- .../src/TaintTracker.cpp | 23 +-- .../src/include/InferFreshCons.h | 6 +- .../src/include/TaintTracker.h | 4 +- 12 files changed, 404 insertions(+), 86 deletions(-) create mode 100644 benchmarks/ctests/example06.c create mode 100644 benchmarks/ctests/example06.ll create mode 100644 benchmarks/ctests/example06.orig.ll diff --git a/benchmarks/ctests/example05.c b/benchmarks/ctests/example05.c index 49fe304..58dddf6 100644 --- a/benchmarks/ctests/example05.c +++ b/benchmarks/ctests/example05.c @@ -17,6 +17,9 @@ void app() { for (int i = 0; i < 10; i++) { log(x); } + for (int i = 0; i < 10; i++) { + log(1); + } Fresh(x); } diff --git a/benchmarks/ctests/example05.ll b/benchmarks/ctests/example05.ll index 7330ad1..4dfc800 100644 --- a/benchmarks/ctests/example05.ll +++ b/benchmarks/ctests/example05.ll @@ -41,6 +41,7 @@ define void @app() #0 { entry: %x = alloca i32, align 4 %i = alloca i32, align 4 + %i1 = alloca i32, align 4 call void @atomic_start() %call = call i32 @input() store i32 %call, ptr %x, align 4 @@ -64,6 +65,25 @@ for.inc: ; preds = %for.body br label %for.cond, !llvm.loop !5 for.end: ; preds = %for.cond + store i32 0, ptr %i1, align 4 + br label %for.cond2 + +for.cond2: ; preds = %for.inc5, %for.end + %3 = load i32, ptr %i1, align 4 + %cmp3 = icmp slt i32 %3, 10 + br i1 %cmp3, label %for.body4, label %for.end7 + +for.body4: ; preds = %for.cond2 + call void @log(i32 noundef 1) + br label %for.inc5 + +for.inc5: ; preds = %for.body4 + %4 = load i32, ptr %i1, align 4 + %inc6 = add nsw i32 %4, 1 + store i32 %inc6, ptr %i1, align 4 + br label %for.cond2, !llvm.loop !7 + +for.end7: ; preds = %for.cond2 call void @atomic_end() ret void } @@ -88,3 +108,4 @@ attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-pr !4 = !{!"Homebrew clang version 17.0.2"} !5 = distinct !{!5, !6} !6 = !{!"llvm.loop.mustprogress"} +!7 = distinct !{!7, !6} diff --git a/benchmarks/ctests/example05.orig.ll b/benchmarks/ctests/example05.orig.ll index dc149e7..6dcc44f 100644 --- a/benchmarks/ctests/example05.orig.ll +++ b/benchmarks/ctests/example05.orig.ll @@ -49,6 +49,7 @@ define void @app() #0 { entry: %x = alloca i32, align 4 %i = alloca i32, align 4 + %i1 = alloca i32, align 4 %call = call i32 @input() store i32 %call, ptr %x, align 4 store i32 0, ptr %i, align 4 @@ -71,8 +72,27 @@ for.inc: ; preds = %for.body br label %for.cond, !llvm.loop !5 for.end: ; preds = %for.cond - %3 = load i32, ptr %x, align 4 - call void @Fresh(i32 noundef %3) + store i32 0, ptr %i1, align 4 + br label %for.cond2 + +for.cond2: ; preds = %for.inc5, %for.end + %3 = load i32, ptr %i1, align 4 + %cmp3 = icmp slt i32 %3, 10 + br i1 %cmp3, label %for.body4, label %for.end7 + +for.body4: ; preds = %for.cond2 + call void @log(i32 noundef 1) + br label %for.inc5 + +for.inc5: ; preds = %for.body4 + %4 = load i32, ptr %i1, align 4 + %inc6 = add nsw i32 %4, 1 + store i32 %inc6, ptr %i1, align 4 + br label %for.cond2, !llvm.loop !7 + +for.end7: ; preds = %for.cond2 + %5 = load i32, ptr %x, align 4 + call void @Fresh(i32 noundef %5) ret void } @@ -96,3 +116,4 @@ attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-pr !4 = !{!"Homebrew clang version 17.0.2"} !5 = distinct !{!5, !6} !6 = !{!"llvm.loop.mustprogress"} +!7 = distinct !{!7, !6} diff --git a/benchmarks/ctests/example06.c b/benchmarks/ctests/example06.c new file mode 100644 index 0000000..d192581 --- /dev/null +++ b/benchmarks/ctests/example06.c @@ -0,0 +1,25 @@ +#include + +void Fresh(int x) {} +void Consistent(int x, int id) {} + +void atomic_start() {} +void atomic_end() {} + +int input(int i) { return i; } +int (*IO_NAME)() = input; + +void log(int x) { + printf("%d\n", x); +} + +void app() { + int i = 1; + int x = input(i); + Fresh(x); + log(x); +} + +int main() { + app(); +} \ No newline at end of file diff --git a/benchmarks/ctests/example06.ll b/benchmarks/ctests/example06.ll new file mode 100644 index 0000000..603f917 --- /dev/null +++ b/benchmarks/ctests/example06.ll @@ -0,0 +1,75 @@ +; ModuleID = '../../benchmarks/ctests/example06.c' +source_filename = "../../benchmarks/ctests/example06.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input(i32 noundef %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, ptr %i.addr, align 4 + %0 = load i32, ptr %i.addr, align 4 + ret i32 %0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %i = alloca i32, align 4 + %x = alloca i32, align 4 + store i32 1, ptr %i, align 4 + %0 = load i32, ptr %i, align 4 + call void @atomic_start() + %call = call i32 @input(i32 noundef %0) + store i32 %call, ptr %x, align 4 + %1 = load i32, ptr %x, align 4 + call void @log(i32 noundef %1) + call void @atomic_end() + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} diff --git a/benchmarks/ctests/example06.orig.ll b/benchmarks/ctests/example06.orig.ll new file mode 100644 index 0000000..4aea90e --- /dev/null +++ b/benchmarks/ctests/example06.orig.ll @@ -0,0 +1,93 @@ +; ModuleID = '../../benchmarks/ctests/example06.c' +source_filename = "../../benchmarks/ctests/example06.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Fresh(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Consistent(i32 noundef %x, i32 noundef %id) #0 { +entry: + %x.addr = alloca i32, align 4 + %id.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + store i32 %id, ptr %id.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input(i32 noundef %i) #0 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, ptr %i.addr, align 4 + %0 = load i32, ptr %i.addr, align 4 + ret i32 %0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %i = alloca i32, align 4 + %x = alloca i32, align 4 + store i32 1, ptr %i, align 4 + %0 = load i32, ptr %i, align 4 + %call = call i32 @input(i32 noundef %0) + store i32 %call, ptr %x, align 4 + %1 = load i32, ptr %x, align 4 + call void @Fresh(i32 noundef %1) + %2 = load i32, ptr %x, align 4 + call void @log(i32 noundef %2) + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile index e4d21fa..d93e037 100644 --- a/ocelot/AtomicRegionInference/Makefile +++ b/ocelot/AtomicRegionInference/Makefile @@ -6,6 +6,7 @@ all: make eg3 make eg4 make eg5 + make eg6 eg1: TEST=example01 make test @@ -17,6 +18,8 @@ eg4: TEST=example04 make test eg5: TEST=example05 make test +eg6: + TEST=example06 make test run_eg1: TEST=example01 make run @@ -28,6 +31,8 @@ run_eg4: TEST=example04 make run run_eg5: TEST=example05 make run +run_eg6: + TEST=example06 make run test: $(MAKE) -C build diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp index 2317a2b..42c8f3b 100644 --- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp +++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp @@ -29,7 +29,7 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) { // to only go through all the declarations once. std::map consVars; inst_vec_vec freshVars; - inst_insts_map inputMap = buildInputs(this->M); + auto [inputMap, inputInsts] = buildInputs(this->M); errs() << "inputMap:\n"; printInstInsts(inputMap); inst_vec toDeleteAnnots; @@ -49,10 +49,10 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) { for (auto* inst : insts) errs() << *inst << "\n"; #endif -#if DEBUG - errs() << "Print inputMap CallInst entries:\n"; - printInstInsts(inputMap, true); -#endif + // #if DEBUG + // errs() << "Print inputMap CallInst entries:\n"; + // printInstInsts(inputMap, true); + // #endif auto allConsSets = collectCons(consVars, inputMap); auto allFresh = collectFresh(freshVars, inputMap); @@ -72,8 +72,8 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) { // Consistent first InferFreshCons* ci = new InferFreshCons(&FAM, &M, atomStart, atomEnd); - ci->inferCons(allConsSets, &allFresh, &toDeleteAnnots); - ci->inferFresh(allFresh, &allConsSets, &toDeleteAnnots); + ci->inferCons(allConsSets, &allFresh, &toDeleteAnnots, &inputInsts); + ci->inferFresh(allFresh, &allConsSets, &toDeleteAnnots, &inputInsts); // Delete annotations removeAnnotations(toDeleteAnnots); @@ -81,7 +81,7 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) { return PreservedAnalyses::none(); } -// This function finds annotated variables +// Finds *all* variables affected by annotation void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_vec* freshVars, inst_insts_map inputMap, inst_vec* toDelete) { #if DEBUG @@ -92,7 +92,7 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ for (auto& I : B) { if (auto* ci = dyn_cast(&I)) { #if DEBUG - errs() << "[Loop Inst] Found call: " << *ci << "\n"; + errs() << "[Loop I] Found call: " << *ci << "\n"; #endif auto* fun = ci->getCalledFunction(); // Various empty or null checks @@ -100,7 +100,6 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ auto funName = fun->getName(); // Consistent & FreshConsistent if (isAnnot(funName) && !funName.equals("Fresh")) { - errs() << "getAnnot: " << ci << "\n"; toDelete->push_back(ci); int setID; // Bit cast use of x, then value operand of store @@ -251,44 +250,44 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ // Fresh & FreshConsistent if (isAnnot(funName) && !funName.equals("Consistent")) { #if DEBUG - errs() << "[Loop Inst] Calls Fresh\n"; + errs() << "[Loop I] Calls Fresh\n"; #endif std::set v; if (find(toDelete->begin(), toDelete->end(), ci) == toDelete->end()) { - errs() << "getAnnot: " << ci << "\n"; + // errs() << "getAnnot: " << ci << "\n"; toDelete->push_back(ci); } -#if DEBUG - errs() << "[Loop Inst] Print inputMap entries:\n"; - printInstInsts(inputMap); -#endif + // #if DEBUG + // errs() << "[Loop I] Print inputMap entries:\n"; + // printInstInsts(inputMap); + // #endif //* Can't actually remove, otherwise wrong result // #if DEBUG - // errs() << "[Loop Inst] Remove Fresh call from inputMap\n"; + // errs() << "[Loop I] Remove Fresh call from inputMap\n"; // #endif // inputMap.erase(ci); - auto* arg = ci->getOperand(0); + auto* freshArg = ci->getOperand(0); #if DEBUG - errs() << "[Loop Inst] Fresh arg: " << *arg << "\n"; + errs() << "[Loop I] freshArg: " << *freshArg << "\n"; #endif - if (auto* inst = dyn_cast(arg)) { + if (auto* inst = dyn_cast(freshArg)) { #if DEBUG - errs() << "[Loop Inst] arg = Instruction, add to v\n"; + errs() << "[Loop I] Add freshVar to v\n"; #endif v.emplace(inst); //* Actually collect all uses (e.g., log(x)) if (auto* li = dyn_cast(inst)) { #if DEBUG - errs() << "[Loop Inst] Further arg = LoadInst\n"; + errs() << "[Loop I] Further arg = LoadInst\n"; #endif auto* ptr = li->getPointerOperand(); #if DEBUG - errs() << "[Loop Inst] Ptr operand: " << *ptr << "\n"; + errs() << "[Loop I] Ptr operand: " << *ptr << "\n"; #endif for (auto* ptrUse : ptr->users()) { #if DEBUG @@ -296,7 +295,7 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ #endif if (ptrUse != inst) { if (auto* liUse = dyn_cast(ptrUse)) { - errs() << "[Loop ptr users] ptrUse diff from Fresh arg, add to v\n"; + errs() << "[Loop ptr users] ptrUse = LoadInst & diff from freshArg, add to v\n"; v.emplace(liUse); } } @@ -306,9 +305,9 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ // v.push_back(ci); #if DEBUG - errs() << "[Loop Inst] Go over arg users\n"; + errs() << "[Loop I] Go over arg users\n"; #endif - for (auto* use : arg->users()) { + for (auto* use : freshArg->users()) { if (auto* si = dyn_cast(use)) { #if DEBUG errs() << "[Loop Users] use = StoreInst, add to v: " << *si << "\n"; @@ -325,7 +324,7 @@ void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_ if (!v.empty()) { #if DEBUG - errs() << "[Loop Inst] Add v's insts to a set in freshVars:\n"; + errs() << "[Loop I] Add v's insts to a set in freshVars:\n"; #endif inst_vec tmp; for (auto* inst : v) { @@ -548,13 +547,14 @@ inst_vec_vec InferAtomsPass::collectFresh(inst_vec_vec freshVars, inst_insts_map inst_vec_vec toReturn; #if DEBUG - errs() << "Go over fresh var sets\n"; + errs() << "Go over fresh freshSets\n"; #endif for (auto varSet : freshVars) { #if DEBUG errs() << "[Loop freshVars] Go over varSet:\n"; printInsts(varSet); #endif + inst_set unique, callChain; for (auto* var : varSet) { #if DEBUG diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index cc2eb52..c0252ff 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -59,7 +59,7 @@ BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) { } // Top level region inference function -- could flatten later -void InferFreshCons::inferCons(std::map consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots) { +void InferFreshCons::inferCons(std::map consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots, std::set* inputInsts) { #if DEBUG errs() << "=== inferCons ===\n"; #endif @@ -67,7 +67,7 @@ void InferFreshCons::inferCons(std::map consSets, inst_vec_vec* f #if DEBUG errs() << "[inferCons] Adding region for set " << id << "\n"; #endif - addRegion(set, freshSets, toDeleteAnnots); + addRegion(set, freshSets, toDeleteAnnots, nullptr); } #if DEBUG errs() << "*** inferCons ***\n"; @@ -75,7 +75,7 @@ void InferFreshCons::inferCons(std::map consSets, inst_vec_vec* f } // The only difference is outer map vs outer vec -void InferFreshCons::inferFresh(inst_vec_vec freshSets, std::map* consSets, inst_vec* toDeleteAnnots) { +void InferFreshCons::inferFresh(inst_vec_vec freshSets, std::map* consSets, inst_vec* toDeleteAnnots, std::set* inputInsts) { #if DEBUG errs() << "=== inferFresh ===\n"; #endif @@ -84,14 +84,14 @@ void InferFreshCons::inferFresh(inst_vec_vec freshSets, std::map* for (auto& [_, consSet] : *consSets) consVec.push_back(consSet); for (auto freshSet : freshSets) { - addRegion(freshSet, &consVec, toDeleteAnnots); + addRegion(freshSet, &consVec, toDeleteAnnots, inputInsts); } #if DEBUG errs() << "*** inferFresh ***\n"; #endif } -void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_vec* toDeleteAnnots) { +void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_vec* toDeleteAnnots, std::set* inputInsts) { #if DEBUG errs() << "=== addRegion ===\n"; #endif @@ -117,23 +117,24 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v inst_inst_vec regionsFound; while (!regionsNeeded.empty()) { // Need to raise all blocks in the map until they are the same - auto blocks = regionsNeeded.front(); + auto taintedBlocks = regionsNeeded.front(); regionsNeeded.pop(); + // Record which functions have been traveled through std::set seenFuns; #if DEBUG errs() << "[Loop regionsNeeded] While blocks are in diff functions\n"; #endif - while (!sameFunction(blocks)) { + while (!sameFunction(taintedBlocks)) { // To think on: does this change? - auto* goal = findCandidate(blocks, root); + auto* goal = findCandidate(taintedBlocks, root); #if DEBUG errs() << "[Loop !sameFunction] Go over each targetInst\n"; #endif for (auto* targetInst : targetInsts) { // not all blocks need to be moved up - auto* curFun = blocks[targetInst]->getParent(); + auto* curFun = taintedBlocks[targetInst]->getParent(); seenFuns.insert(curFun); if (curFun != goal) { // if more than one call: @@ -153,13 +154,13 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } // update the original map if (first) { - blocks[targetInst] = inst->getParent(); + taintedBlocks[targetInst] = inst->getParent(); first = false; } else { // copy the blockmap, update, add to queue auto* inst = dyn_cast(use); std::map copy; - for (auto map : blocks) copy[map.first] = map.second; + for (auto map : taintedBlocks) copy[map.first] = map.second; copy[targetInst] = inst->getParent(); regionsNeeded.push(copy); } @@ -174,25 +175,71 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v errs() << "[Loop regionsNeeded] Start dom tree analysis\n"; #endif - auto* homeFun = blocks.begin()->second->getParent(); + auto* homeFun = taintedBlocks.begin()->second->getParent(); if (homeFun == nullptr) { #if DEBUG - errs() << "[regionsNeeded] No function found\n"; + errs() << "[Loop regionsNeeded] No function found\n"; #endif continue; } #if DEBUG - errs() << "[regionsNeeded] Found home fun: " << homeFun->getName() << "\n"; + errs() << "[Loop regionsNeeded] Found home fun: " << homeFun->getName() << "\n"; #endif + // Tainted blocks right before untained blocks + std::vector lastTainted; + BasicBlock* prevTainted; + + for (auto& B : *homeFun) { + bool isTainted = false; + + for (auto& [_, taintedBlock] : taintedBlocks) { + if (&B == taintedBlock) { + isTainted = true; + break; + } + } + + if (!isTainted) { + errs() << "Not tainted: " << B << "\n"; + if (prevTainted != nullptr && find(lastTainted.begin(), lastTainted.end(), prevTainted) == lastTainted.end()) + lastTainted.push_back(prevTainted); + } else { + prevTainted = &B; + } + } + + for (auto* B : lastTainted) { + errs() << "lastTainted: " << *B << "\n"; + } + + // lastTainted[1]->setNext(); + #if OPT + std::set seenBlocks; + bool hasRewired = false; + #if DEBUG - errs() << "[regionsNeeded] Go over all block insts\n"; + errs() << "[Loop regionsNeeded] Go over all blocks\n"; #endif - std::set seenBlocks; - for (auto& [_, B] : blocks) { - if (seenBlocks.find(B) == seenBlocks.end()) { - seenBlocks.emplace(B); + for (auto& B : *homeFun) { + bool isTainted = false; + for (auto& [_, tB] : taintedBlocks) { + if (&B == tB) { + isTainted = true; + break; + } + } + + if (!isTainted && seenBlocks.find(&B) == seenBlocks.end()) { + seenBlocks.emplace(&B); + + errs() << "Terminator: " << *B.getTerminator() << "\n"; + } else if (isTainted && seenBlocks.find(&B) == seenBlocks.end()) { +#if DEBUG + errs() << "[Loop B] New tainted block\n"; +#endif + seenBlocks.emplace(&B); // A mapping from original instructions to their clones inst_inst_map clonedInsts; @@ -201,7 +248,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v // (The original) instructions to be deleted inst_vec toDelete; - for (auto& I : *B) { + for (auto& I : B) { #if DEBUG errs() << I << "\n"; #endif @@ -250,12 +297,28 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v clone = shouldDelay ? I.clone() : &I; if (shouldDelay && I.getNumOperands() > 1) { - auto* op = dyn_cast(I.getOperand(0)); - inst_inst_map::iterator it = clonedInsts.find(op); - assert(it != clonedInsts.end()); - clone->setOperand(0, it->second); + if (auto* op = dyn_cast(I.getOperand(0))) { + inst_inst_map::iterator it = clonedInsts.find(op); + assert(it != clonedInsts.end()); + clone->setOperand(0, it->second); + } } } else if (isa(&I)) { + // Check whether any IO function calls coming after depend on this store + // If so, do NOT delay + auto* storePtr = I.getOperand(1); + for (auto* user : storePtr->users()) { + if (auto* li = dyn_cast(user)) { + for (auto* liUser : li->users()) { + if (auto* ci = dyn_cast(liUser)) { + if (inputInsts->find(ci) != inputInsts->end()) { + shouldDelay = false; + } + } + } + } + } + clone = I.clone(); if (auto* op = dyn_cast(I.getOperand(0))) { @@ -263,8 +326,19 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v assert(it != clonedInsts.end()); clone->setOperand(0, it->second); } + } else if (isa(&I)) { + // Check whether any IO function calls coming after depend on this load + // If so, do NOT delay + for (auto* user : I.users()) { + if (auto* ci = dyn_cast(user)) { + if (inputInsts->find(ci) != inputInsts->end()) { + shouldDelay = false; + } + } + } + + clone = I.clone(); } else { - // E.g., LoadInst clone = I.clone(); } @@ -277,7 +351,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } } - IRBuilder builder(B); + IRBuilder builder(&B); // Append each delayed instruction to the end of the block, // in the original order for (auto* I : toDelay) builder.Insert(I); @@ -285,9 +359,9 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v #if DEBUG errs() << "Delete originals:\n"; #endif - auto I = B->begin(); + auto I = B.begin(); // Delete the originals - for (; I != B->end();) { + for (; I != B.end();) { #if DEBUG errs() << *I << "\n"; #endif @@ -328,7 +402,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } #if DEBUG - errs() << "After: " << *B << "\n"; + errs() << "After: " << B << "\n"; #endif } } @@ -336,8 +410,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v auto& domTree = FAM->getResult(*homeFun); // Find the closest point that dominates - auto* startDom = blocks.begin()->second; - for (auto& [_, B] : blocks) + auto* startDom = taintedBlocks.begin()->second; + for (auto& [_, B] : taintedBlocks) startDom = domTree.findNearestCommonDominator(B, startDom); #if DEBUG errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n"; @@ -352,8 +426,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v // Flip directions for the region end auto& postDomTree = FAM->getResult(*homeFun); // Find the closest point that dominates - auto* endDom = blocks.begin()->second; - for (auto& [_, block] : blocks) { + auto* endDom = taintedBlocks.begin()->second; + for (auto& [_, taintedBlock] : taintedBlocks) { #if DEBUGINFER if (endDom != nullptr) { errs() << "Finding post dom of: " << getSimpleNodeLabel(map.second) << " and " << getSimpleNodeLabel(endDom) << "\n"; @@ -361,7 +435,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v errs() << "endDom is null\n"; } #endif - endDom = postDomTree.findNearestCommonDominator(block, endDom); + endDom = postDomTree.findNearestCommonDominator(taintedBlock, endDom); } #if DEBUG @@ -399,7 +473,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } // TODO: fallback if endDom is null? Need hyper-blocks, I think - // possibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations? + // pOssibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations? auto* regionStart = truncate(startDom, true, targetInsts, seenFuns); auto* regionEnd = truncate(endDom, false, targetInsts, seenFuns); if (regionStart == nullptr) { @@ -668,14 +742,14 @@ int InferFreshCons::getSubLength(BasicBlock* B, Instruction* end, std::vectorgetCalledFunction(); if (!cf->empty() && cf != NULL) { #if DEBUG - errs() << "[Loop I] Cur inst = CallInst, calling: " << cf->getName() << "\n"; + errs() << "[Loop I] I = CallInst, calling: " << cf->getName() << "\n"; #endif count += cf->getInstructionCount(); } @@ -692,11 +766,11 @@ int InferFreshCons::getSubLength(BasicBlock* B, Instruction* end, std::vector max_ret) { diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp index 1c400d9..45f30db 100644 --- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp +++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp @@ -1,16 +1,16 @@ #include "include/TaintTracker.h" // Main dataflow function to construct map of store (TODO: not just stores) insts to vars (inputs?) they depend on -inst_insts_map buildInputs(Module* M) { +std::pair> buildInputs(Module* M) { #if DEBUG errs() << "=== buildInputs ===\n"; #endif - inst_vec inputInsts = findInputInsts(M); + std::set inputInsts = findInputInsts(M); inst_insts_map taintedInsts; inst_vec promotedInputs; - for (auto inputInst : inputInsts) { + for (auto* inputInst : inputInsts) { #if DEBUG errs() << "[Loop inputInst] inputInst: " << *inputInst << "\n"; #endif @@ -274,7 +274,7 @@ inst_insts_map buildInputs(Module* M) { #if DEBUG errs() << "*** buildInputs ***\n"; #endif - return taintedInsts; + return make_pair(taintedInsts, inputInsts); } val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* taintedInsts, Instruction* caller) { @@ -610,29 +610,29 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai return interProcFlows; } -inst_vec findInputInsts(Module* M) { +std::set findInputInsts(Module* M) { #if DEBUG - errs() << "findInputInsts\n"; + errs() << "=== findInputInsts ===\n"; #endif - inst_vec inputInsts; + std::set inputInsts; // Find IO_NAME annotations for (auto& gv : M->globals()) { if (gv.getName().starts_with("IO_NAME")) { - if (auto* fp = dyn_cast(gv.getInitializer())) { + if (auto* ioFun = dyn_cast(gv.getInitializer())) { #if DEBUG - errs() << "Found IO fun: " << fp->getName() << "\n"; + errs() << "Found IO fun: " << ioFun->getName() << "\n"; #endif // Now, search for calls to those functions for (auto& F : *M) { for (auto& B : F) { for (auto& I : B) { if (auto* ci = dyn_cast(&I)) { - if (fp == ci->getCalledFunction()) { + if (ioFun == ci->getCalledFunction()) { #if DEBUG errs() << "Found IO call: " << I << "\n"; #endif - inputInsts.push_back(&I); + inputInsts.insert(ci); } } } @@ -645,6 +645,7 @@ inst_vec findInputInsts(Module* M) { } } + errs() << "*** findInputInsts ***\n"; return inputInsts; } diff --git a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h index e8dfc8a..f26adbf 100644 --- a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h +++ b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h @@ -20,9 +20,9 @@ struct InferFreshCons { enum InsertKind { Start, End }; - void inferCons(std::map consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots); - void inferFresh(inst_vec_vec freshSets, std::map* consSets, inst_vec* toDeleteAnnots); - void addRegion(inst_vec conSet, inst_vec_vec* other, inst_vec* toDeleteAnnots); + void inferCons(std::map consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots, std::set* inputInsts); + void inferFresh(inst_vec_vec freshSets, std::map* consSets, inst_vec* toDeleteAnnots, std::set* inputInsts); + void addRegion(inst_vec conSet, inst_vec_vec* other, inst_vec* toDeleteAnnots, std::set* inputInsts); Function* findCandidate(std::map blocks, Function* root); Instruction* insertRegionInst(InsertKind insertKind, Instruction* insertBefore); bool sameFunction(std::map blockMap); diff --git a/ocelot/AtomicRegionInference/src/include/TaintTracker.h b/ocelot/AtomicRegionInference/src/include/TaintTracker.h index ea3ce03..1b06e48 100644 --- a/ocelot/AtomicRegionInference/src/include/TaintTracker.h +++ b/ocelot/AtomicRegionInference/src/include/TaintTracker.h @@ -5,9 +5,9 @@ using namespace llvm; -inst_insts_map buildInputs(Module* m); +std::pair> buildInputs(Module* m); val_vec traverseLocal(Value* tainted, Instruction* srcOp, inst_insts_map* buildMap, Instruction* caller); -inst_vec findInputInsts(Module* M); +std::set findInputInsts(Module* M); Instruction* ptrAfterCall(Value* ptr, CallInst* ci); bool storePrecedesUse(Instruction* use, StoreInst* toMatch); inst_vec couldMatchGEPI(GetElementPtrInst* tGEPI); From b8b00374d00ef96eb473a7696c0f23657c56abd5 Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Sat, 9 Mar 2024 22:39:57 -0500 Subject: [PATCH 12/18] [InferAtomsPass] Loop optimization Extract untainted instructions into their own loop that doesn't go into the atomic region. Test plan: `make eg5` and observe the difference between `benchmarks/ctests/example05.ll` (optimized) and `benchmarks/ctests/example05.orig.ll` (original), or `make eg7`. --- benchmarks/ctests/example05.c | 7 +- benchmarks/ctests/example05.ll | 45 ++-- benchmarks/ctests/example05.orig.ll | 26 +- benchmarks/ctests/example07.c | 27 ++ benchmarks/ctests/example07.ll | 114 ++++++++ .../{example04.ll => example07.orig.ll} | 47 +++- ocelot/AtomicRegionInference/Makefile | 7 +- .../src/InferFreshCons.cpp | 253 +++++++++++++++++- 8 files changed, 462 insertions(+), 64 deletions(-) create mode 100644 benchmarks/ctests/example07.c create mode 100644 benchmarks/ctests/example07.ll rename benchmarks/ctests/{example04.ll => example07.orig.ll} (64%) diff --git a/benchmarks/ctests/example05.c b/benchmarks/ctests/example05.c index 58dddf6..e46b4fb 100644 --- a/benchmarks/ctests/example05.c +++ b/benchmarks/ctests/example05.c @@ -14,12 +14,13 @@ void log(int x) { void app() { int x = input(); - for (int i = 0; i < 10; i++) { - log(x); - } for (int i = 0; i < 10; i++) { log(1); + log(x); } + // for (int i = 0; i < 10; i++) { + // log(1); + // } Fresh(x); } diff --git a/benchmarks/ctests/example05.ll b/benchmarks/ctests/example05.ll index 4dfc800..f137154 100644 --- a/benchmarks/ctests/example05.ll +++ b/benchmarks/ctests/example05.ll @@ -40,8 +40,8 @@ declare i32 @printf(ptr noundef, ...) #1 define void @app() #0 { entry: %x = alloca i32, align 4 + %0 = alloca i32, align 4 %i = alloca i32, align 4 - %i1 = alloca i32, align 4 call void @atomic_start() %call = call i32 @input() store i32 %call, ptr %x, align 4 @@ -49,42 +49,42 @@ entry: br label %for.cond for.cond: ; preds = %entry, %for.inc - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 10 + %1 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %1, 10 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond - %1 = load i32, ptr %x, align 4 - call void @log(i32 noundef %1) + %2 = load i32, ptr %x, align 4 + call void @log(i32 noundef %2) br label %for.inc for.inc: ; preds = %for.body - %2 = load i32, ptr %i, align 4 - %inc = add nsw i32 %2, 1 + %3 = load i32, ptr %i, align 4 + %inc = add nsw i32 %3, 1 store i32 %inc, ptr %i, align 4 br label %for.cond, !llvm.loop !5 for.end: ; preds = %for.cond - store i32 0, ptr %i1, align 4 - br label %for.cond2 + call void @atomic_end() + store i32 0, ptr %0, align 4 + br label %for.cond1 -for.cond2: ; preds = %for.inc5, %for.end - %3 = load i32, ptr %i1, align 4 - %cmp3 = icmp slt i32 %3, 10 - br i1 %cmp3, label %for.body4, label %for.end7 +for.cond1: ; preds = %for.inc3, %for.end + %4 = load i32, ptr %0, align 4 + %5 = icmp slt i32 %4, 10 + br i1 %5, label %for.body2, label %for.end4 -for.body4: ; preds = %for.cond2 +for.body2: ; preds = %for.cond1 call void @log(i32 noundef 1) - br label %for.inc5 + br label %for.inc3 -for.inc5: ; preds = %for.body4 - %4 = load i32, ptr %i1, align 4 - %inc6 = add nsw i32 %4, 1 - store i32 %inc6, ptr %i1, align 4 - br label %for.cond2, !llvm.loop !7 +for.inc3: ; preds = %for.body2 + %6 = load i32, ptr %0, align 4 + %7 = add nsw i32 %6, 1 + store i32 %7, ptr %0, align 4 + br label %for.cond1, !llvm.loop !5 -for.end7: ; preds = %for.cond2 - call void @atomic_end() +for.end4: ; preds = %for.cond1 ret void } @@ -108,4 +108,3 @@ attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-pr !4 = !{!"Homebrew clang version 17.0.2"} !5 = distinct !{!5, !6} !6 = !{!"llvm.loop.mustprogress"} -!7 = distinct !{!7, !6} diff --git a/benchmarks/ctests/example05.orig.ll b/benchmarks/ctests/example05.orig.ll index 6dcc44f..c9e181a 100644 --- a/benchmarks/ctests/example05.orig.ll +++ b/benchmarks/ctests/example05.orig.ll @@ -49,7 +49,6 @@ define void @app() #0 { entry: %x = alloca i32, align 4 %i = alloca i32, align 4 - %i1 = alloca i32, align 4 %call = call i32 @input() store i32 %call, ptr %x, align 4 store i32 0, ptr %i, align 4 @@ -61,6 +60,7 @@ for.cond: ; preds = %for.inc, %entry br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond + call void @log(i32 noundef 1) %1 = load i32, ptr %x, align 4 call void @log(i32 noundef %1) br label %for.inc @@ -72,27 +72,8 @@ for.inc: ; preds = %for.body br label %for.cond, !llvm.loop !5 for.end: ; preds = %for.cond - store i32 0, ptr %i1, align 4 - br label %for.cond2 - -for.cond2: ; preds = %for.inc5, %for.end - %3 = load i32, ptr %i1, align 4 - %cmp3 = icmp slt i32 %3, 10 - br i1 %cmp3, label %for.body4, label %for.end7 - -for.body4: ; preds = %for.cond2 - call void @log(i32 noundef 1) - br label %for.inc5 - -for.inc5: ; preds = %for.body4 - %4 = load i32, ptr %i1, align 4 - %inc6 = add nsw i32 %4, 1 - store i32 %inc6, ptr %i1, align 4 - br label %for.cond2, !llvm.loop !7 - -for.end7: ; preds = %for.cond2 - %5 = load i32, ptr %x, align 4 - call void @Fresh(i32 noundef %5) + %3 = load i32, ptr %x, align 4 + call void @Fresh(i32 noundef %3) ret void } @@ -116,4 +97,3 @@ attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-pr !4 = !{!"Homebrew clang version 17.0.2"} !5 = distinct !{!5, !6} !6 = !{!"llvm.loop.mustprogress"} -!7 = distinct !{!7, !6} diff --git a/benchmarks/ctests/example07.c b/benchmarks/ctests/example07.c new file mode 100644 index 0000000..19fe98d --- /dev/null +++ b/benchmarks/ctests/example07.c @@ -0,0 +1,27 @@ +#include + +void Fresh(int x) {} + +void atomic_start() {} +void atomic_end() {} + +int input() { return 0; } +int (*IO_NAME)() = input; + +void log(int x) { + printf("%d\n", x); +} + +void app() { + int x = input(); + for (int i = 0; i < 10; i++) { + int y = 1; + log(x); + log(y + 2); + } + Fresh(x); +} + +int main() { + app(); +} \ No newline at end of file diff --git a/benchmarks/ctests/example07.ll b/benchmarks/ctests/example07.ll new file mode 100644 index 0000000..41881ab --- /dev/null +++ b/benchmarks/ctests/example07.ll @@ -0,0 +1,114 @@ +; ModuleID = '../../benchmarks/ctests/example07.c' +source_filename = "../../benchmarks/ctests/example07.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %0 = alloca i32, align 4 + %i = alloca i32, align 4 + %y = alloca i32, align 4 + call void @atomic_start() + %call = call i32 @input() + store i32 %call, ptr %x, align 4 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %entry, %for.inc + %1 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %1, 10 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load i32, ptr %x, align 4 + call void @log(i32 noundef %2) + br label %for.inc + +for.inc: ; preds = %for.body + %3 = load i32, ptr %i, align 4 + %inc = add nsw i32 %3, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond, !llvm.loop !5 + +for.end: ; preds = %for.cond + call void @atomic_end() + store i32 0, ptr %0, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc3, %for.end + %4 = load i32, ptr %0, align 4 + %5 = icmp slt i32 %4, 10 + br i1 %5, label %for.body2, label %for.end4 + +for.body2: ; preds = %for.cond1 + store i32 1, ptr %y, align 4 + %6 = load i32, ptr %y, align 4 + %7 = add nsw i32 %6, 2 + call void @log(i32 noundef %7) + br label %for.inc3 + +for.inc3: ; preds = %for.body2 + %8 = load i32, ptr %0, align 4 + %9 = add nsw i32 %8, 1 + store i32 %9, ptr %0, align 4 + br label %for.cond1, !llvm.loop !5 + +for.end4: ; preds = %for.cond1 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} diff --git a/benchmarks/ctests/example04.ll b/benchmarks/ctests/example07.orig.ll similarity index 64% rename from benchmarks/ctests/example04.ll rename to benchmarks/ctests/example07.orig.ll index a3a1d72..299b165 100644 --- a/benchmarks/ctests/example04.ll +++ b/benchmarks/ctests/example07.orig.ll @@ -1,11 +1,19 @@ -; ModuleID = '../../benchmarks/ctests/example04.c' -source_filename = "../../benchmarks/ctests/example04.c" +; ModuleID = '../../benchmarks/ctests/example07.c' +source_filename = "../../benchmarks/ctests/example07.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" @IO_NAME = global ptr @input, align 8 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Fresh(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + ; Function Attrs: noinline nounwind optnone ssp uwtable(sync) define void @atomic_start() #0 { entry: @@ -40,19 +48,36 @@ declare i32 @printf(ptr noundef, ...) #1 define void @app() #0 { entry: %x = alloca i32, align 4 + %i = alloca i32, align 4 %y = alloca i32, align 4 - call void @atomic_start() %call = call i32 @input() - call void @atomic_start() - %call1 = call i32 @input() - call void @atomic_end() - store i32 %call1, ptr %y, align 4 - %0 = load i32, ptr %y, align 4 - call void @log(i32 noundef %0) - call void @atomic_end() store i32 %call, ptr %x, align 4 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 10 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 1, ptr %y, align 4 %1 = load i32, ptr %x, align 4 call void @log(i32 noundef %1) + %2 = load i32, ptr %y, align 4 + %add = add nsw i32 %2, 2 + call void @log(i32 noundef %add) + br label %for.inc + +for.inc: ; preds = %for.body + %3 = load i32, ptr %i, align 4 + %inc = add nsw i32 %3, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond, !llvm.loop !5 + +for.end: ; preds = %for.cond + %4 = load i32, ptr %x, align 4 + call void @Fresh(i32 noundef %4) ret void } @@ -74,3 +99,5 @@ attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-pr !2 = !{i32 7, !"uwtable", i32 1} !3 = !{i32 7, !"frame-pointer", i32 1} !4 = !{!"Homebrew clang version 17.0.2"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile index d93e037..451976c 100644 --- a/ocelot/AtomicRegionInference/Makefile +++ b/ocelot/AtomicRegionInference/Makefile @@ -1,4 +1,4 @@ -.PHONY: clean_tests clean eg1 eg2 +.PHONY: clean_tests clean eg1 eg2 eg3 eg4 eg5 eg6 eg7 run_eg1 run_eg2 run_eg3 run_eg4 run_eg5 run_eg6 run_eg7 all: make eg1 @@ -7,6 +7,7 @@ all: make eg4 make eg5 make eg6 + make eg7 eg1: TEST=example01 make test @@ -20,6 +21,8 @@ eg5: TEST=example05 make test eg6: TEST=example06 make test +eg7: + TEST=example07 make test run_eg1: TEST=example01 make run @@ -33,6 +36,8 @@ run_eg5: TEST=example05 make run run_eg6: TEST=example06 make run +run_eg7: + TEST=example07 make run test: $(MAKE) -C build diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index c0252ff..65e2b0c 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -1,5 +1,6 @@ #include "include/InferFreshCons.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" Instruction* InferFreshCons::insertRegionInst(InsertKind insertKind, Instruction* insertBefore) { @@ -219,6 +220,16 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v std::set seenBlocks; bool hasRewired = false; + /* + - Check if stmt is in a loop + - Remove stmt from loop + - Clone that loop + - Remove tainted insts in cloned loop + - Connect the two loops + */ + LoopInfo& LI = FAM->getResult(*homeFun); + std::map> untaintedClones; + #if DEBUG errs() << "[Loop regionsNeeded] Go over all blocks\n"; #endif @@ -232,12 +243,14 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } if (!isTainted && seenBlocks.find(&B) == seenBlocks.end()) { +#if DEBUG + errs() << "[Loop B] Untainted block " << B.getName() << "\n"; +#endif seenBlocks.emplace(&B); - errs() << "Terminator: " << *B.getTerminator() << "\n"; } else if (isTainted && seenBlocks.find(&B) == seenBlocks.end()) { #if DEBUG - errs() << "[Loop B] New tainted block\n"; + errs() << "[Loop B] Tainted block " << B.getName() << "\n"; #endif seenBlocks.emplace(&B); @@ -270,7 +283,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v auto shouldDelay = find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end() && !inExistingSet; #if DEBUG - errs() << " Should" << (shouldDelay ? " " : " NOT ") << "be delayed\n"; + errs() << "__Should" << (shouldDelay ? " " : " NOT ") << "be delayed__\n"; #endif Instruction* clone; @@ -290,6 +303,20 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v clone->setOperand(i, it->second); } } + + if (shouldDelay) { + auto* loop = LI.getLoopFor(&B); + if (loop != nullptr) { +#if DEBUG + errs() << "In loop, keep track of it\n"; +#endif + + if (untaintedClones.count(loop) == 0) + untaintedClones[loop] = {clone}; + else + untaintedClones[loop].push_back(clone); + } + } } else if (isa(&I)) { // In case I is an IO function call, we don't clone it // and instead map it to itself for referencing later @@ -303,6 +330,20 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v clone->setOperand(0, it->second); } } + + if (shouldDelay) { + auto* loop = LI.getLoopFor(&B); + if (loop != nullptr) { +#if DEBUG + errs() << "In loop, keep track of it\n"; +#endif + + if (untaintedClones.count(loop) == 0) + untaintedClones[loop] = {clone}; + else + untaintedClones[loop].push_back(clone); + } + } } else if (isa(&I)) { // Check whether any IO function calls coming after depend on this store // If so, do NOT delay @@ -326,6 +367,20 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v assert(it != clonedInsts.end()); clone->setOperand(0, it->second); } + + if (shouldDelay) { + auto* loop = LI.getLoopFor(&B); + if (loop != nullptr) { +#if DEBUG + errs() << "In loop, keep track of it\n"; +#endif + + if (untaintedClones.count(loop) == 0) + untaintedClones[loop] = {clone}; + else + untaintedClones[loop].push_back(clone); + } + } } else if (isa(&I)) { // Check whether any IO function calls coming after depend on this load // If so, do NOT delay @@ -338,6 +393,20 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } clone = I.clone(); + + if (shouldDelay) { + auto* loop = LI.getLoopFor(&B); + if (loop != nullptr) { +#if DEBUG + errs() << "In loop, keep track of it\n"; +#endif + + if (untaintedClones.count(loop) == 0) + untaintedClones[loop] = {clone}; + else + untaintedClones[loop].push_back(clone); + } + } } else { clone = I.clone(); } @@ -352,6 +421,9 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } IRBuilder builder(&B); +#if DEBUG + errs() << "Add delayed instructions to end of block\n"; +#endif // Append each delayed instruction to the end of the block, // in the original order for (auto* I : toDelay) builder.Insert(I); @@ -366,10 +438,10 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v errs() << *I << "\n"; #endif if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) { + I = I->eraseFromParent(); #if DEBUG errs() << "Deleted\n"; #endif - I = I->eraseFromParent(); } else I++; } @@ -406,6 +478,179 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v #endif } } + + for (auto& [taintedLoop, untaintedClones] : untaintedClones) { +#if DEBUG + errs() << "Clone taintedLoop\n"; +#endif + std::vector clonedLoop; + BasicBlock* forEnd; + Instruction* clonedAlloca; + Value* initVal; + inst_inst_map clones; + + auto loopBlocks = taintedLoop->getBlocks(); + assert(loopBlocks.size() == 3); + for (int i = 0; i < loopBlocks.size(); i++) { + auto* block = loopBlocks[i]; + auto* clonedBlock = BasicBlock::Create(block->getContext(), block->getName(), homeFun); + IRBuilder builder(clonedBlock); + + Instruction* prev; + for (auto& I : *block) { + auto* clonedI = I.clone(); + + // Only extract if untainted + // Covers the cond and inc blocks; they are processed on the fly due to + // their special role in keeping the loop going + if (find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end()) { + // for.cond + if (i == 0) { + if (auto* li = dyn_cast(clonedI)) { + auto* ptr = li->getPointerOperand(); + + if (auto* ai = dyn_cast(*ptr->uses().begin())) { + IRBuilder builder(ai); + clonedAlloca = builder.CreateAlloca(ai->getAllocatedType()); + } + + for (auto* ptrUser : ptr->users()) { + if (auto* si = dyn_cast(ptrUser)) { + if (!isa(si->getOperand(0))) { + initVal = si->getOperand(0); + } + } + } + + li->setOperand(0, clonedAlloca); + prev = li; + } else if (auto* ci = dyn_cast(clonedI)) { + // TODO: Check if operand originates from the current loop + if (isa(ci->getOperand(0))) { + ci->setOperand(0, prev); + } + prev = ci; + } else if (auto* bi = dyn_cast(clonedI)) { + assert(bi->isConditional()); + bi->setCondition(prev); + + if (auto* B = dyn_cast(bi->getOperand(1))) { + forEnd = B; + } + } + } + + // for.inc + else if (i == 2) { + if (auto* li = dyn_cast(clonedI)) { + li->setOperand(0, clonedAlloca); + prev = li; + } else if (auto* bi = dyn_cast(clonedI)) { + auto* lhs = bi->getOperand(0); + if (isa(lhs)) bi->setOperand(0, prev); + auto* rhs = bi->getOperand(1); + if (isa(rhs)) bi->setOperand(1, prev); + prev = bi; + } else if (auto* si = dyn_cast(clonedI)) { + si->setOperand(0, prev); + si->setOperand(1, clonedAlloca); + } + } + + clones.emplace(&I, clonedI); + builder.Insert(clonedI); + } + } + + // for.body + // Performs a standard sound cloning procedure (on each operand); + // the instructions in the body are unrelated to the loop except the final + // branch instruction + for (auto& I : *clonedBlock) { + if (i == 1) { + if (auto* si = dyn_cast(&I)) { + for (int i = 0; i < si->getNumOperands(); i++) { + auto* I = dyn_cast(si->getOperand(i)); + if (I != nullptr) { + inst_inst_map::iterator it = clones.find(I); + if (it != clones.end()) si->setOperand(i, it->second); + } + } + } else if (auto* li = dyn_cast(&I)) { + auto* ptr = dyn_cast(li->getPointerOperand()); + inst_inst_map::iterator it = clones.find(ptr); + if (it != clones.end()) li->setOperand(0, it->second); + } else if (auto* bi = dyn_cast(&I)) { + auto* lhs = dyn_cast(bi->getOperand(0)); + inst_inst_map::iterator lhsIt = clones.find(lhs); + if (lhsIt != clones.end()) bi->setOperand(0, lhsIt->second); + + auto* rhs = dyn_cast(bi->getOperand(1)); + inst_inst_map::iterator rhsIt = clones.find(rhs); + if (rhsIt != clones.end()) bi->setOperand(0, rhsIt->second); + } else if (auto* ci = dyn_cast(&I)) { + for (int i = 0; i < ci->getNumOperands() - 1; i++) { + auto* arg = dyn_cast(ci->getOperand(i)); + inst_inst_map::iterator argIt = clones.find(arg); + if (argIt != clones.end()) ci->setOperand(i, argIt->second); + } + } + } + } + + clonedLoop.push_back(clonedBlock); + } + + BasicBlock* forEndClone = BasicBlock::Create(forEnd->getContext(), forEnd->getName(), homeFun); + IRBuilder builder(forEndClone); + for (auto& I : *forEnd) { + if (!isa(I) && !isa(I)) { + auto* clone = I.clone(); + builder.Insert(clone); + } + + if (isa(I)) { + IRBuilder builder(&I); + builder.CreateBr(clonedLoop[0]); + I.removeFromParent(); + break; + } + } + + for (auto& I : *forEnd) { + if (auto* bi = dyn_cast(&I)) { + IRBuilder builder(bi); + builder.CreateStore(initVal, clonedAlloca); + } + } + + // Connect the blocks of the new loop + for (int i = 0; i < clonedLoop.size(); i++) { + auto* block = clonedLoop[i]; + for (auto& I : *block) { + if (auto* bi = dyn_cast(&I)) { + // for.cond + if (i == 0) { + bi->setSuccessor(0, clonedLoop[1]); + bi->setSuccessor(1, forEndClone); + } + // for.body + else if (i == 1) { + bi->setSuccessor(0, clonedLoop[2]); + } + // for.inc + else if (i == 2) { + bi->setSuccessor(0, clonedLoop[0]); + } + } + } + errs() << *block << "\n"; + } + + for (auto* untaintedClone : untaintedClones) { + untaintedClone->removeFromParent(); + } + } #endif auto& domTree = FAM->getResult(*homeFun); From db36f5da8b7cb2687b39c63c5a22cd2f8bbbe15c Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Mon, 11 Mar 2024 18:37:00 -0400 Subject: [PATCH 13/18] [InferAtomsPass] Optimization improvement for parameterized IO calls and refactoring for concision Now, the instructions "tainted" by an IO call will be included in the fresh set as well, making it so that they remain preceeding the IO call, within their atomic region. This is a more fundamental solution than before, where exceptions were only made to these instructions during optimization. The optimization now has a more modular structure where common instruction patching logic is extracted into a reusable procedure to be run more than once (`Helpers::patchClonedBlock`). It comes into play after cloning a basic block, to rewire its instructions to properly reference each other. Test plan: `make` --- benchmarks/ctests/example04.ll | 76 ++++ benchmarks/ctests/example05.ll | 4 +- benchmarks/ctests/example06.ll | 2 +- benchmarks/ctests/example07.ll | 4 +- ocelot/AtomicRegionInference/Makefile | 7 +- ocelot/AtomicRegionInference/src/Helpers.cpp | 42 ++- .../AtomicRegionInference/src/InferAtoms.cpp | 38 +- .../src/InferFreshCons.cpp | 329 ++++-------------- .../src/TaintTracker.cpp | 1 - .../src/include/Helpers.h | 1 + 10 files changed, 222 insertions(+), 282 deletions(-) create mode 100644 benchmarks/ctests/example04.ll diff --git a/benchmarks/ctests/example04.ll b/benchmarks/ctests/example04.ll new file mode 100644 index 0000000..a3a1d72 --- /dev/null +++ b/benchmarks/ctests/example04.ll @@ -0,0 +1,76 @@ +; ModuleID = '../../benchmarks/ctests/example04.c' +source_filename = "../../benchmarks/ctests/example04.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %y = alloca i32, align 4 + call void @atomic_start() + %call = call i32 @input() + call void @atomic_start() + %call1 = call i32 @input() + call void @atomic_end() + store i32 %call1, ptr %y, align 4 + %0 = load i32, ptr %y, align 4 + call void @log(i32 noundef %0) + call void @atomic_end() + store i32 %call, ptr %x, align 4 + %1 = load i32, ptr %x, align 4 + call void @log(i32 noundef %1) + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} diff --git a/benchmarks/ctests/example05.ll b/benchmarks/ctests/example05.ll index f137154..aee5708 100644 --- a/benchmarks/ctests/example05.ll +++ b/benchmarks/ctests/example05.ll @@ -48,7 +48,7 @@ entry: store i32 0, ptr %i, align 4 br label %for.cond -for.cond: ; preds = %entry, %for.inc +for.cond: ; preds = %entry, %for.inc, %1 = load i32, ptr %i, align 4 %cmp = icmp slt i32 %1, 10 br i1 %cmp, label %for.body, label %for.end @@ -58,7 +58,7 @@ for.body: ; preds = %for.cond call void @log(i32 noundef %2) br label %for.inc -for.inc: ; preds = %for.body +for.inc: ; preds = %for.body, %3 = load i32, ptr %i, align 4 %inc = add nsw i32 %3, 1 store i32 %inc, ptr %i, align 4 diff --git a/benchmarks/ctests/example06.ll b/benchmarks/ctests/example06.ll index 603f917..fad0c8b 100644 --- a/benchmarks/ctests/example06.ll +++ b/benchmarks/ctests/example06.ll @@ -44,9 +44,9 @@ define void @app() #0 { entry: %i = alloca i32, align 4 %x = alloca i32, align 4 + call void @atomic_start() store i32 1, ptr %i, align 4 %0 = load i32, ptr %i, align 4 - call void @atomic_start() %call = call i32 @input(i32 noundef %0) store i32 %call, ptr %x, align 4 %1 = load i32, ptr %x, align 4 diff --git a/benchmarks/ctests/example07.ll b/benchmarks/ctests/example07.ll index 41881ab..e12917a 100644 --- a/benchmarks/ctests/example07.ll +++ b/benchmarks/ctests/example07.ll @@ -49,7 +49,7 @@ entry: store i32 0, ptr %i, align 4 br label %for.cond -for.cond: ; preds = %entry, %for.inc +for.cond: ; preds = %entry, %for.inc, %1 = load i32, ptr %i, align 4 %cmp = icmp slt i32 %1, 10 br i1 %cmp, label %for.body, label %for.end @@ -59,7 +59,7 @@ for.body: ; preds = %for.cond call void @log(i32 noundef %2) br label %for.inc -for.inc: ; preds = %for.body +for.inc: ; preds = %for.body, %3 = load i32, ptr %i, align 4 %inc = add nsw i32 %3, 1 store i32 %inc, ptr %i, align 4 diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile index 451976c..b92b0ff 100644 --- a/ocelot/AtomicRegionInference/Makefile +++ b/ocelot/AtomicRegionInference/Makefile @@ -1,4 +1,4 @@ -.PHONY: clean_tests clean eg1 eg2 eg3 eg4 eg5 eg6 eg7 run_eg1 run_eg2 run_eg3 run_eg4 run_eg5 run_eg6 run_eg7 +.PHONY: clean_tests clean eg1 eg2 eg3 eg4 eg5 eg6 eg7 eg8 run_eg1 run_eg2 run_eg3 run_eg4 run_eg5 run_eg6 run_eg7 run_eg8 all: make eg1 @@ -8,6 +8,7 @@ all: make eg5 make eg6 make eg7 + make eg8 eg1: TEST=example01 make test @@ -23,6 +24,8 @@ eg6: TEST=example06 make test eg7: TEST=example07 make test +eg8: + TEST=example08 make test run_eg1: TEST=example01 make run @@ -38,6 +41,8 @@ run_eg6: TEST=example06 make run run_eg7: TEST=example07 make run +run_eg8: + TEST=example08 make run test: $(MAKE) -C build diff --git a/ocelot/AtomicRegionInference/src/Helpers.cpp b/ocelot/AtomicRegionInference/src/Helpers.cpp index 5ca398e..896250d 100644 --- a/ocelot/AtomicRegionInference/src/Helpers.cpp +++ b/ocelot/AtomicRegionInference/src/Helpers.cpp @@ -2,9 +2,6 @@ std::string getSimpleNodeLabel(const Value* node) { if (node->hasName()) { - // #if DEBUG - // errs() << "Node has name\n"; - // #endif return node->getName().str(); } @@ -42,3 +39,42 @@ void printIntInsts(const std::map& iim) { errs() << "\n"; } } + +/** + * Given a freshly cloned basic block, repair references among its + * instructions based on a mapping from the original instructions + * to their clones. + * + * @param block The cloned basic block + * @param clonedInsts The mapping from original to cloned instructions + */ +void patchClonedBlock(BasicBlock* block, inst_inst_map clonedInsts) { + for (auto& I : *block) { + if (auto* si = dyn_cast(&I)) { + for (int i = 0; i < si->getNumOperands(); i++) { + auto* operand = dyn_cast(si->getOperand(i)); + if (operand != nullptr) { + inst_inst_map::iterator it = clonedInsts.find(operand); + if (it != clonedInsts.end()) si->setOperand(i, it->second); + } + } + } else if (auto* li = dyn_cast(&I)) { + auto* ptr = dyn_cast(li->getPointerOperand()); + inst_inst_map::iterator it = clonedInsts.find(ptr); + if (it != clonedInsts.end()) li->setOperand(0, it->second); + } else if (auto* bi = dyn_cast(&I)) { + for (unsigned i = 0; i < bi->getNumOperands(); i++) { + auto* operand = dyn_cast(bi->getOperand(i)); + inst_inst_map::iterator it = clonedInsts.find(operand); + if (it != clonedInsts.end()) bi->setOperand(i, it->second); + } + } else if (auto* ci = dyn_cast(&I)) { + // The last operand is the called function + for (unsigned i = 0; i < ci->getNumOperands() - 1; i++) { + auto* arg = dyn_cast(ci->getOperand(i)); + inst_inst_map::iterator argIt = clonedInsts.find(arg); + if (argIt != clonedInsts.end()) ci->setOperand(i, argIt->second); + } + } + } +} diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp index 42c8f3b..428adab 100644 --- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp +++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp @@ -547,7 +547,7 @@ inst_vec_vec InferAtomsPass::collectFresh(inst_vec_vec freshVars, inst_insts_map inst_vec_vec toReturn; #if DEBUG - errs() << "Go over fresh freshSets\n"; + errs() << "Go over freshSets\n"; #endif for (auto varSet : freshVars) { #if DEBUG @@ -572,11 +572,41 @@ inst_vec_vec InferAtomsPass::collectFresh(inst_vec_vec freshVars, inst_insts_map #endif unique.insert(use); - for (auto* input : inputMap[use]) { #if DEBUG - errs() << "[Loop inputMap[use]] Add src input of use to unique: " << *input << "\n"; + errs() << "[Loop uses] Go over each src input of use\n"; #endif - unique.insert(input); + for (auto* input : inputMap[use]) { +#if DEBUG + errs() << "Src input: " << *input << "\n"; + errs() << "Add insts tainted by it to unique\n"; +#endif + + if (unique.count(input) == 0) { + unique.insert(input); + + auto* ci = dyn_cast(input); + std::queue toExplore; + toExplore.push(ci); + + while (!toExplore.empty()) { + auto* I = toExplore.front(); + toExplore.pop(); + + // TODO: If there's no tainted inst in the chain, + // then don't need to include in unique + errs() << "[Loop inputInst] Found inst tainted by src input: " << *I << "\n"; + if (isa(I) || isa(I) || isa(I)) { + unique.insert(I); + for (auto& operand : I->operands()) + if (auto* operandI = dyn_cast(operand)) + toExplore.push(operandI); + } else if (auto* ai = dyn_cast(I)) { + for (auto* user : ai->users()) + if (auto* userI = dyn_cast(user)) + if (unique.count(userI) == 0) toExplore.push(userI); + } + } + } } } diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index 65e2b0c..81c7364 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -104,8 +104,10 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v #if DEBUG errs() << "Build map from inst to bb\n"; #endif - for (auto* targetInst : targetInsts) + for (auto* targetInst : targetInsts) { + // errs() << "Check: " << *targetInst << "\n"; targetBlocks[targetInst] = targetInst->getParent(); + } #if DEBUG errs() << "Add map to regionsNeeded\n"; @@ -187,48 +189,11 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v errs() << "[Loop regionsNeeded] Found home fun: " << homeFun->getName() << "\n"; #endif - // Tainted blocks right before untained blocks - std::vector lastTainted; - BasicBlock* prevTainted; - - for (auto& B : *homeFun) { - bool isTainted = false; - - for (auto& [_, taintedBlock] : taintedBlocks) { - if (&B == taintedBlock) { - isTainted = true; - break; - } - } - - if (!isTainted) { - errs() << "Not tainted: " << B << "\n"; - if (prevTainted != nullptr && find(lastTainted.begin(), lastTainted.end(), prevTainted) == lastTainted.end()) - lastTainted.push_back(prevTainted); - } else { - prevTainted = &B; - } - } - - for (auto* B : lastTainted) { - errs() << "lastTainted: " << *B << "\n"; - } - - // lastTainted[1]->setNext(); - #if OPT std::set seenBlocks; - bool hasRewired = false; - - /* - - Check if stmt is in a loop - - Remove stmt from loop - - Clone that loop - - Remove tainted insts in cloned loop - - Connect the two loops - */ + LoopInfo& LI = FAM->getResult(*homeFun); - std::map> untaintedClones; + std::map> untaintedLoopClones; #if DEBUG errs() << "[Loop regionsNeeded] Go over all blocks\n"; @@ -242,209 +207,64 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } } - if (!isTainted && seenBlocks.find(&B) == seenBlocks.end()) { + if (isTainted && seenBlocks.find(&B) == seenBlocks.end()) { #if DEBUG - errs() << "[Loop B] Untainted block " << B.getName() << "\n"; -#endif - seenBlocks.emplace(&B); - errs() << "Terminator: " << *B.getTerminator() << "\n"; - } else if (isTainted && seenBlocks.find(&B) == seenBlocks.end()) { -#if DEBUG - errs() << "[Loop B] Tainted block " << B.getName() << "\n"; + errs() << "Tainted block " << B.getName() << ":\n"; #endif seenBlocks.emplace(&B); - // A mapping from original instructions to their clones - inst_inst_map clonedInsts; - // Instructions to be delayed till the end of the block - inst_vec toDelay; - // (The original) instructions to be deleted - inst_vec toDelete; + inst_vec toDelete, toDelay; + inst_inst_map instClones; for (auto& I : B) { #if DEBUG errs() << I << "\n"; #endif - bool isRegionBoundary = false; - if (auto* ci = dyn_cast(&I)) { - auto funName = ci->getCalledFunction()->getName(); - isRegionBoundary = - funName.equals("atomic_start") || funName.equals("atomic_end"); - } - // Only attempt to schedule instruction if it's not alloca or a region boundary - if (!isa(I) && !isRegionBoundary) { - bool inExistingSet = false; - for (auto insts : *other) { - if (find(insts.begin(), insts.end(), &I) != insts.end()) { - inExistingSet = true; - } + bool inExistingSet = false; + for (auto insts : *other) { + if (find(insts.begin(), insts.end(), &I) != insts.end()) { + inExistingSet = true; + break; } + } - auto shouldDelay = find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end() && !inExistingSet; -#if DEBUG - errs() << "__Should" << (shouldDelay ? " " : " NOT ") << "be delayed__\n"; -#endif - - Instruction* clone; - - // Clone each untainted instruction to be appended to - // the end of the basic block, in the original order - if (isa(I)) { - clone = I.clone(); - - for (int i = 0; i < 2; i++) { - if (auto* op = dyn_cast(I.getOperand(i))) { - // Since operands don't get cloned along the eway, - // look up the clone of each operand... - inst_inst_map::iterator it = clonedInsts.find(op); - assert(it != clonedInsts.end()); - // ...and overwrite the original operand with it - clone->setOperand(i, it->second); - } - } - - if (shouldDelay) { - auto* loop = LI.getLoopFor(&B); - if (loop != nullptr) { -#if DEBUG - errs() << "In loop, keep track of it\n"; -#endif - - if (untaintedClones.count(loop) == 0) - untaintedClones[loop] = {clone}; - else - untaintedClones[loop].push_back(clone); - } - } - } else if (isa(&I)) { - // In case I is an IO function call, we don't clone it - // and instead map it to itself for referencing later - - clone = shouldDelay ? I.clone() : &I; - - if (shouldDelay && I.getNumOperands() > 1) { - if (auto* op = dyn_cast(I.getOperand(0))) { - inst_inst_map::iterator it = clonedInsts.find(op); - assert(it != clonedInsts.end()); - clone->setOperand(0, it->second); - } - } - - if (shouldDelay) { - auto* loop = LI.getLoopFor(&B); - if (loop != nullptr) { -#if DEBUG - errs() << "In loop, keep track of it\n"; -#endif - - if (untaintedClones.count(loop) == 0) - untaintedClones[loop] = {clone}; - else - untaintedClones[loop].push_back(clone); - } - } - } else if (isa(&I)) { - // Check whether any IO function calls coming after depend on this store - // If so, do NOT delay - auto* storePtr = I.getOperand(1); - for (auto* user : storePtr->users()) { - if (auto* li = dyn_cast(user)) { - for (auto* liUser : li->users()) { - if (auto* ci = dyn_cast(liUser)) { - if (inputInsts->find(ci) != inputInsts->end()) { - shouldDelay = false; - } - } - } - } - } - - clone = I.clone(); - - if (auto* op = dyn_cast(I.getOperand(0))) { - inst_inst_map::iterator it = clonedInsts.find(op); - assert(it != clonedInsts.end()); - clone->setOperand(0, it->second); - } + bool isAtomicBoundary = false; + if (auto* ci = dyn_cast(&I)) { + auto* calledFun = ci->getCalledFunction(); + if (calledFun == this->atomStart || calledFun == this->atomEnd) + isAtomicBoundary = true; + } - if (shouldDelay) { - auto* loop = LI.getLoopFor(&B); - if (loop != nullptr) { + // TODO: Exception with the entry block to a loop (prepone untainted insts instead) + if (find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end() && !isa(&I) && !inExistingSet && !isAtomicBoundary) { #if DEBUG - errs() << "In loop, keep track of it\n"; + errs() << "__Should be delayed__\n"; #endif + auto* clone = I.clone(); + instClones.emplace(&I, clone); + toDelete.push_back(&I); + toDelay.push_back(clone); - if (untaintedClones.count(loop) == 0) - untaintedClones[loop] = {clone}; - else - untaintedClones[loop].push_back(clone); - } - } - } else if (isa(&I)) { - // Check whether any IO function calls coming after depend on this load - // If so, do NOT delay - for (auto* user : I.users()) { - if (auto* ci = dyn_cast(user)) { - if (inputInsts->find(ci) != inputInsts->end()) { - shouldDelay = false; - } - } - } - - clone = I.clone(); - - if (shouldDelay) { - auto* loop = LI.getLoopFor(&B); - if (loop != nullptr) { + auto* loop = LI.getLoopFor(&B); + if (loop != nullptr) { #if DEBUG - errs() << "In loop, keep track of it\n"; + errs() << "__In loop, keep track of it__\n"; #endif - - if (untaintedClones.count(loop) == 0) - untaintedClones[loop] = {clone}; - else - untaintedClones[loop].push_back(clone); - } - } - } else { - clone = I.clone(); - } - - clonedInsts.emplace(&I, clone); - - if (shouldDelay) { - toDelete.push_back(&I); - toDelay.push_back(clone); + if (untaintedLoopClones.count(loop) == 0) + untaintedLoopClones[loop] = {clone}; + else + untaintedLoopClones[loop].push_back(clone); } } } - IRBuilder builder(&B); -#if DEBUG - errs() << "Add delayed instructions to end of block\n"; -#endif - // Append each delayed instruction to the end of the block, - // in the original order - for (auto* I : toDelay) builder.Insert(I); + for (auto* I : toDelete) I->removeFromParent(); -#if DEBUG - errs() << "Delete originals:\n"; -#endif - auto I = B.begin(); - // Delete the originals - for (; I != B.end();) { -#if DEBUG - errs() << *I << "\n"; -#endif - if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) { - I = I->eraseFromParent(); -#if DEBUG - errs() << "Deleted\n"; -#endif - } else - I++; - } + IRBuilder BBuilder(&B); + for (auto* I : toDelay) BBuilder.Insert(I); + + patchClonedBlock(&B, instClones); // Sync freshSets if (other != nullptr) { @@ -479,18 +299,20 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } } - for (auto& [taintedLoop, untaintedClones] : untaintedClones) { + for (auto& [taintedLoop, untaintedClones] : untaintedLoopClones) { #if DEBUG errs() << "Clone taintedLoop\n"; #endif + errs() << "ayo\n"; std::vector clonedLoop; BasicBlock* forEnd; Instruction* clonedAlloca; Value* initVal; - inst_inst_map clones; + inst_inst_map instClones; auto loopBlocks = taintedLoop->getBlocks(); assert(loopBlocks.size() == 3); + for (int i = 0; i < loopBlocks.size(); i++) { auto* block = loopBlocks[i]; auto* clonedBlock = BasicBlock::Create(block->getContext(), block->getName(), homeFun); @@ -557,7 +379,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } } - clones.emplace(&I, clonedI); + instClones.emplace(&I, clonedI); builder.Insert(clonedI); } } @@ -566,37 +388,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v // Performs a standard sound cloning procedure (on each operand); // the instructions in the body are unrelated to the loop except the final // branch instruction - for (auto& I : *clonedBlock) { - if (i == 1) { - if (auto* si = dyn_cast(&I)) { - for (int i = 0; i < si->getNumOperands(); i++) { - auto* I = dyn_cast(si->getOperand(i)); - if (I != nullptr) { - inst_inst_map::iterator it = clones.find(I); - if (it != clones.end()) si->setOperand(i, it->second); - } - } - } else if (auto* li = dyn_cast(&I)) { - auto* ptr = dyn_cast(li->getPointerOperand()); - inst_inst_map::iterator it = clones.find(ptr); - if (it != clones.end()) li->setOperand(0, it->second); - } else if (auto* bi = dyn_cast(&I)) { - auto* lhs = dyn_cast(bi->getOperand(0)); - inst_inst_map::iterator lhsIt = clones.find(lhs); - if (lhsIt != clones.end()) bi->setOperand(0, lhsIt->second); - - auto* rhs = dyn_cast(bi->getOperand(1)); - inst_inst_map::iterator rhsIt = clones.find(rhs); - if (rhsIt != clones.end()) bi->setOperand(0, rhsIt->second); - } else if (auto* ci = dyn_cast(&I)) { - for (int i = 0; i < ci->getNumOperands() - 1; i++) { - auto* arg = dyn_cast(ci->getOperand(i)); - inst_inst_map::iterator argIt = clones.find(arg); - if (argIt != clones.end()) ci->setOperand(i, argIt->second); - } - } - } - } + if (i == 1) patchClonedBlock(clonedBlock, instClones); clonedLoop.push_back(clonedBlock); } @@ -644,11 +436,10 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } } } - errs() << *block << "\n"; } for (auto* untaintedClone : untaintedClones) { - untaintedClone->removeFromParent(); + if (!isa(untaintedClone)) untaintedClone->removeFromParent(); } } #endif @@ -658,11 +449,11 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v auto* startDom = taintedBlocks.begin()->second; for (auto& [_, B] : taintedBlocks) startDom = domTree.findNearestCommonDominator(B, startDom); -#if DEBUG - errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n"; -#endif + // #if DEBUG + // errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n"; + // #endif - // TODO: if an inst in the set is in the bb, we can truncate? + // TODO: if an inst in the set is in the bb, we can truncate? #if DEBUG errs() << "Start post dom tree analysis\n"; @@ -683,9 +474,9 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v endDom = postDomTree.findNearestCommonDominator(taintedBlock, endDom); } -#if DEBUG - errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n"; -#endif + // #if DEBUG + // errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n"; + // #endif if (startDom == nullptr) { errs() << "[Error] Null startDom\n"; @@ -697,11 +488,11 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v startDom = domTree.findNearestCommonDominator(startDom, endDom); endDom = postDomTree.findNearestCommonDominator(startDom, endDom); -#if DEBUG - errs() << "[Loop regionsNeeded] After matching scope\n"; - errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n"; - errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n"; -#endif + // #if DEBUG + // errs() << "[Loop regionsNeeded] After matching scope\n"; + // errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n"; + // errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n"; + // #endif // Extra check to disallow loop conditional block as the end if (loopCheck(endDom)) { @@ -745,6 +536,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v //}//end while regions needed #if DEBUG + errs() << "Final:\n" + << *root << "\n"; errs() << "*** addRegion ***\n"; #endif } diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp index 45f30db..3da9778 100644 --- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp +++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp @@ -639,7 +639,6 @@ std::set findInputInsts(Module* M) { } } } else { - // TODO: Say something else errs() << "[ERROR] Could not unwrap function pointer from annotation\n"; } } diff --git a/ocelot/AtomicRegionInference/src/include/Helpers.h b/ocelot/AtomicRegionInference/src/include/Helpers.h index d5f553d..bde9ca9 100644 --- a/ocelot/AtomicRegionInference/src/include/Helpers.h +++ b/ocelot/AtomicRegionInference/src/include/Helpers.h @@ -15,5 +15,6 @@ bool isAnnot(const StringRef annotName); void printInstInsts(const inst_insts_map& iim, bool onlyCalls = false); void printInsts(const inst_vec& iv); void printIntInsts(const std::map& iim); +void patchClonedBlock(BasicBlock* block, inst_inst_map clonedInsts); #endif \ No newline at end of file From 978fe072ee1b814258941d04c24b12323a844549 Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Tue, 12 Mar 2024 13:12:40 -0400 Subject: [PATCH 14/18] [InferAtomsPass] Don't optimize loops with tainted loop conditions In the case of loop conditions that depend on fresh/consistent input values, no instruction in the loop body can be extracted out from the atomic region, as shown in the example below: ```rust fn app() -> () { let x = input(); for _ in 0..10 { let y = 1; log(y + 2); log(x); } Fresh(x); } ``` Test plan: `make eg8` --- benchmarks/ctests/example08.c | 27 ++ benchmarks/ctests/example08.ll | 96 +++++++ benchmarks/ctests/example08.orig.ll | 104 +++++++ ocelot/AtomicRegionInference/Makefile | 16 +- ocelot/AtomicRegionInference/src/Helpers.cpp | 17 +- .../src/InferFreshCons.cpp | 268 +++++++++--------- .../src/TaintTracker.cpp | 2 +- 7 files changed, 387 insertions(+), 143 deletions(-) create mode 100644 benchmarks/ctests/example08.c create mode 100644 benchmarks/ctests/example08.ll create mode 100644 benchmarks/ctests/example08.orig.ll diff --git a/benchmarks/ctests/example08.c b/benchmarks/ctests/example08.c new file mode 100644 index 0000000..77a3580 --- /dev/null +++ b/benchmarks/ctests/example08.c @@ -0,0 +1,27 @@ +#include + +void Fresh(int x) {} + +void atomic_start() {} +void atomic_end() {} + +int input() { return 0; } +int (*IO_NAME)() = input; + +void log(int x) { + printf("%d\n", x); +} + +void app() { + int x = input(); + for (int i = x; i < 10; i++) { + int y = 1; + log(y + 2); + log(x); + } + Fresh(x); +} + +int main() { + app(); +} \ No newline at end of file diff --git a/benchmarks/ctests/example08.ll b/benchmarks/ctests/example08.ll new file mode 100644 index 0000000..142b165 --- /dev/null +++ b/benchmarks/ctests/example08.ll @@ -0,0 +1,96 @@ +; ModuleID = '../../benchmarks/ctests/example08.c' +source_filename = "../../benchmarks/ctests/example08.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %i = alloca i32, align 4 + %y = alloca i32, align 4 + call void @atomic_start() + %call = call i32 @input() + store i32 %call, ptr %x, align 4 + %0 = load i32, ptr %x, align 4 + store i32 %0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry, , + %1 = load i32, ptr %i, align 4 + %2 = icmp slt i32 %1, 10 + br i1 %2, label %for.body, label %for.end + +for.body: ; preds = %for.cond, + %3 = load i32, ptr %x, align 4 + call void @log(i32 noundef %3) + store i32 1, ptr %y, align 4 + %4 = load i32, ptr %y, align 4 + %5 = add nsw i32 %4, 2 + call void @log(i32 noundef %5) + br label %for.inc + +for.inc: ; preds = %for.body, + %6 = load i32, ptr %i, align 4 + %7 = add nsw i32 %6, 1 + store i32 %7, ptr %i, align 4 + br label %for.cond, !llvm.loop !5 + +for.end: ; preds = %for.cond, + call void @atomic_end() + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} diff --git a/benchmarks/ctests/example08.orig.ll b/benchmarks/ctests/example08.orig.ll new file mode 100644 index 0000000..f0dbf25 --- /dev/null +++ b/benchmarks/ctests/example08.orig.ll @@ -0,0 +1,104 @@ +; ModuleID = '../../benchmarks/ctests/example08.c' +source_filename = "../../benchmarks/ctests/example08.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Fresh(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %i = alloca i32, align 4 + %y = alloca i32, align 4 + %call = call i32 @input() + store i32 %call, ptr %x, align 4 + %0 = load i32, ptr %x, align 4 + store i32 %0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %1 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %1, 10 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 1, ptr %y, align 4 + %2 = load i32, ptr %y, align 4 + %add = add nsw i32 %2, 2 + call void @log(i32 noundef %add) + %3 = load i32, ptr %x, align 4 + call void @log(i32 noundef %3) + br label %for.inc + +for.inc: ; preds = %for.body + %4 = load i32, ptr %i, align 4 + %inc = add nsw i32 %4, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond, !llvm.loop !5 + +for.end: ; preds = %for.cond + %5 = load i32, ptr %x, align 4 + call void @Fresh(i32 noundef %5) + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile index b92b0ff..53d3c55 100644 --- a/ocelot/AtomicRegionInference/Makefile +++ b/ocelot/AtomicRegionInference/Makefile @@ -28,21 +28,21 @@ eg8: TEST=example08 make test run_eg1: - TEST=example01 make run + TEST=example01 make run && ../../benchmarks/ctests/example01.out run_eg2: - TEST=example02 make run + TEST=example02 make run && ../../benchmarks/ctests/example02.out run_eg3: - TEST=example03 make run + TEST=example03 make run && ../../benchmarks/ctests/example03.out run_eg4: - TEST=example04 make run + TEST=example04 make run && ../../benchmarks/ctests/example04.out run_eg5: - TEST=example05 make run + TEST=example05 make run && ../../benchmarks/ctests/example05.out run_eg6: - TEST=example06 make run + TEST=example06 make run && ../../benchmarks/ctests/example06.out run_eg7: - TEST=example07 make run + TEST=example07 make run && ../../benchmarks/ctests/example07.out run_eg8: - TEST=example08 make run + TEST=example08 make run && ../../benchmarks/ctests/example08.out test: $(MAKE) -C build diff --git a/ocelot/AtomicRegionInference/src/Helpers.cpp b/ocelot/AtomicRegionInference/src/Helpers.cpp index 896250d..e446001 100644 --- a/ocelot/AtomicRegionInference/src/Helpers.cpp +++ b/ocelot/AtomicRegionInference/src/Helpers.cpp @@ -50,12 +50,13 @@ void printIntInsts(const std::map& iim) { */ void patchClonedBlock(BasicBlock* block, inst_inst_map clonedInsts) { for (auto& I : *block) { - if (auto* si = dyn_cast(&I)) { - for (int i = 0; i < si->getNumOperands(); i++) { - auto* operand = dyn_cast(si->getOperand(i)); + if (isa(I) || isa(I)) { + auto* inst = dyn_cast(&I); + for (int i = 0; i < inst->getNumOperands(); i++) { + auto* operand = dyn_cast(inst->getOperand(i)); if (operand != nullptr) { inst_inst_map::iterator it = clonedInsts.find(operand); - if (it != clonedInsts.end()) si->setOperand(i, it->second); + if (it != clonedInsts.end()) inst->setOperand(i, it->second); } } } else if (auto* li = dyn_cast(&I)) { @@ -72,9 +73,13 @@ void patchClonedBlock(BasicBlock* block, inst_inst_map clonedInsts) { // The last operand is the called function for (unsigned i = 0; i < ci->getNumOperands() - 1; i++) { auto* arg = dyn_cast(ci->getOperand(i)); - inst_inst_map::iterator argIt = clonedInsts.find(arg); - if (argIt != clonedInsts.end()) ci->setOperand(i, argIt->second); + inst_inst_map::iterator it = clonedInsts.find(arg); + if (it != clonedInsts.end()) ci->setOperand(i, it->second); } + } else if (auto* ci = dyn_cast(&I)) { + auto* cond = dyn_cast(ci->getOperand(0)); + inst_inst_map::iterator it = clonedInsts.find(cond); + if (it != clonedInsts.end()) ci->setOperand(0, it->second); } } } diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index 81c7364..b70b112 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -50,13 +50,17 @@ bool InferFreshCons::loopCheck(BasicBlock* B) { // Find the first block after a for loop BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) { auto* ti = bb->getTerminator(); - auto* end = ti->getSuccessor(0); - ti = end->getTerminator(); - // errs() << "end is " << end->getName() << "\n"; - // for switch inst, succ 0 is the fall through - end = ti->getSuccessor(1); - // errs() << "end is " << end->getName() << "\n"; - return end; + if (ti->getNumSuccessors() == 0) { + return bb; + } else { + auto* end = ti->getSuccessor(0); + ti = end->getTerminator(); + // errs() << "end is " << end->getName() << "\n"; + // for switch inst, succ 0 is the fall through + end = ti->getSuccessor(1); + // errs() << "end is " << end->getName() << "\n"; + return end; + } } // Top level region inference function -- could flatten later @@ -194,6 +198,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v LoopInfo& LI = FAM->getResult(*homeFun); std::map> untaintedLoopClones; + bool loopCondTainted = false; #if DEBUG errs() << "[Loop regionsNeeded] Go over all blocks\n"; @@ -248,9 +253,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v auto* loop = LI.getLoopFor(&B); if (loop != nullptr) { -#if DEBUG - errs() << "__In loop, keep track of it__\n"; -#endif + if (&B != loop->getBlocks()[1]) loopCondTainted = true; + if (untaintedLoopClones.count(loop) == 0) untaintedLoopClones[loop] = {clone}; else @@ -299,147 +303,155 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } } - for (auto& [taintedLoop, untaintedClones] : untaintedLoopClones) { -#if DEBUG - errs() << "Clone taintedLoop\n"; -#endif - errs() << "ayo\n"; - std::vector clonedLoop; - BasicBlock* forEnd; - Instruction* clonedAlloca; - Value* initVal; - inst_inst_map instClones; - - auto loopBlocks = taintedLoop->getBlocks(); - assert(loopBlocks.size() == 3); - - for (int i = 0; i < loopBlocks.size(); i++) { - auto* block = loopBlocks[i]; - auto* clonedBlock = BasicBlock::Create(block->getContext(), block->getName(), homeFun); - IRBuilder builder(clonedBlock); - - Instruction* prev; - for (auto& I : *block) { - auto* clonedI = I.clone(); - - // Only extract if untainted - // Covers the cond and inc blocks; they are processed on the fly due to - // their special role in keeping the loop going - if (find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end()) { - // for.cond - if (i == 0) { - if (auto* li = dyn_cast(clonedI)) { - auto* ptr = li->getPointerOperand(); - - if (auto* ai = dyn_cast(*ptr->uses().begin())) { - IRBuilder builder(ai); - clonedAlloca = builder.CreateAlloca(ai->getAllocatedType()); - } + if (!loopCondTainted) { + for (auto& [taintedLoop, untaintedClones] : untaintedLoopClones) { +#if DEBUG + errs() << "Clone taintedLoop\n"; +#endif + std::vector clonedLoop; + BasicBlock* forEnd; + Instruction* clonedAlloca; + Value* initVal; + inst_inst_map instClones; + + auto loopBlocks = taintedLoop->getBlocks(); + assert(loopBlocks.size() == 3); + + for (int i = 0; i < loopBlocks.size(); i++) { + auto* block = loopBlocks[i]; + auto* clonedBlock = BasicBlock::Create(block->getContext(), block->getName(), homeFun); + IRBuilder builder(clonedBlock); + +#if DEBUG + errs() << "Clone block " << block->getName() << "\n"; +#endif + + Instruction* prev; + for (auto& I : *block) { +#if DEBUG + errs() << "Clone inst: " << I << "\n"; +#endif + auto* clonedI = I.clone(); + + // Only extract if untainted + // Covers the cond and inc blocks; they are processed on the fly due to + // their special role in keeping the loop going + if (find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end()) { + // for.cond + if (i == 0) { + if (auto* li = dyn_cast(clonedI)) { + auto* ptr = li->getPointerOperand(); - for (auto* ptrUser : ptr->users()) { - if (auto* si = dyn_cast(ptrUser)) { - if (!isa(si->getOperand(0))) { - initVal = si->getOperand(0); + if (auto* ai = dyn_cast(*ptr->uses().begin())) { + IRBuilder builder(ai); + clonedAlloca = builder.CreateAlloca(ai->getAllocatedType()); + } + + for (auto* ptrUser : ptr->users()) { + if (auto* si = dyn_cast(ptrUser)) { + if (!isa(si->getOperand(0))) { + initVal = si->getOperand(0); + } } } - } - li->setOperand(0, clonedAlloca); - prev = li; - } else if (auto* ci = dyn_cast(clonedI)) { - // TODO: Check if operand originates from the current loop - if (isa(ci->getOperand(0))) { - ci->setOperand(0, prev); - } - prev = ci; - } else if (auto* bi = dyn_cast(clonedI)) { - assert(bi->isConditional()); - bi->setCondition(prev); + li->setOperand(0, clonedAlloca); + prev = li; + } else if (auto* ci = dyn_cast(clonedI)) { + // TODO: Check if operand originates from the current loop + if (isa(ci->getOperand(0))) { + ci->setOperand(0, prev); + } + prev = ci; + } else if (auto* bi = dyn_cast(clonedI)) { + assert(bi->isConditional()); + bi->setCondition(prev); - if (auto* B = dyn_cast(bi->getOperand(1))) { - forEnd = B; + if (auto* B = dyn_cast(bi->getOperand(1))) { + forEnd = B; + } } } - } - // for.inc - else if (i == 2) { - if (auto* li = dyn_cast(clonedI)) { - li->setOperand(0, clonedAlloca); - prev = li; - } else if (auto* bi = dyn_cast(clonedI)) { - auto* lhs = bi->getOperand(0); - if (isa(lhs)) bi->setOperand(0, prev); - auto* rhs = bi->getOperand(1); - if (isa(rhs)) bi->setOperand(1, prev); - prev = bi; - } else if (auto* si = dyn_cast(clonedI)) { - si->setOperand(0, prev); - si->setOperand(1, clonedAlloca); + // for.inc + else if (i == 2) { + if (auto* li = dyn_cast(clonedI)) { + li->setOperand(0, clonedAlloca); + prev = li; + } else if (auto* bi = dyn_cast(clonedI)) { + auto* lhs = bi->getOperand(0); + if (isa(lhs)) bi->setOperand(0, prev); + auto* rhs = bi->getOperand(1); + if (isa(rhs)) bi->setOperand(1, prev); + prev = bi; + } else if (auto* si = dyn_cast(clonedI)) { + si->setOperand(0, prev); + si->setOperand(1, clonedAlloca); + } } - } - instClones.emplace(&I, clonedI); - builder.Insert(clonedI); + instClones.emplace(&I, clonedI); + builder.Insert(clonedI); + } } - } - - // for.body - // Performs a standard sound cloning procedure (on each operand); - // the instructions in the body are unrelated to the loop except the final - // branch instruction - if (i == 1) patchClonedBlock(clonedBlock, instClones); - clonedLoop.push_back(clonedBlock); - } + // for.body + // Performs a standard sound cloning procedure (on each operand); + // the instructions in the body are unrelated to the loop except the final + // branch instruction + if (i == 1) patchClonedBlock(clonedBlock, instClones); - BasicBlock* forEndClone = BasicBlock::Create(forEnd->getContext(), forEnd->getName(), homeFun); - IRBuilder builder(forEndClone); - for (auto& I : *forEnd) { - if (!isa(I) && !isa(I)) { - auto* clone = I.clone(); - builder.Insert(clone); + clonedLoop.push_back(clonedBlock); } - if (isa(I)) { - IRBuilder builder(&I); - builder.CreateBr(clonedLoop[0]); - I.removeFromParent(); - break; - } - } + BasicBlock* forEndClone = BasicBlock::Create(forEnd->getContext(), forEnd->getName(), homeFun); + IRBuilder builder(forEndClone); + for (auto& I : *forEnd) { + if (!isa(I) && !isa(I)) { + auto* clone = I.clone(); + builder.Insert(clone); + } - for (auto& I : *forEnd) { - if (auto* bi = dyn_cast(&I)) { - IRBuilder builder(bi); - builder.CreateStore(initVal, clonedAlloca); + if (isa(I)) { + IRBuilder builder(&I); + builder.CreateBr(clonedLoop[0]); + I.removeFromParent(); + break; + } } - } - // Connect the blocks of the new loop - for (int i = 0; i < clonedLoop.size(); i++) { - auto* block = clonedLoop[i]; - for (auto& I : *block) { + for (auto& I : *forEnd) { if (auto* bi = dyn_cast(&I)) { - // for.cond - if (i == 0) { - bi->setSuccessor(0, clonedLoop[1]); - bi->setSuccessor(1, forEndClone); - } - // for.body - else if (i == 1) { - bi->setSuccessor(0, clonedLoop[2]); - } - // for.inc - else if (i == 2) { - bi->setSuccessor(0, clonedLoop[0]); + IRBuilder builder(bi); + builder.CreateStore(initVal, clonedAlloca); + } + } + + // Connect the blocks of the new loop + for (int i = 0; i < clonedLoop.size(); i++) { + auto* block = clonedLoop[i]; + for (auto& I : *block) { + if (auto* bi = dyn_cast(&I)) { + // for.cond + if (i == 0) { + bi->setSuccessor(0, clonedLoop[1]); + bi->setSuccessor(1, forEndClone); + } + // for.body + else if (i == 1) { + bi->setSuccessor(0, clonedLoop[2]); + } + // for.inc + else if (i == 2) { + bi->setSuccessor(0, clonedLoop[0]); + } } } } - } - for (auto* untaintedClone : untaintedClones) { - if (!isa(untaintedClone)) untaintedClone->removeFromParent(); + for (auto* untaintedClone : untaintedClones) { + if (!isa(untaintedClone)) untaintedClone->removeFromParent(); + } } } #endif diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp index 3da9778..bea58e7 100644 --- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp +++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp @@ -1023,7 +1023,7 @@ inst_vec traverseUses(Instruction* root) { } #if DEBUG - errs() << "=== traverseUses ===\n"; + errs() << "*** traverseUses ***\n"; #endif inst_vec uses_vec(uses.begin(), uses.end()); return uses_vec; From c02ae2c83634ff58269889ae15aeca74b66344a6 Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Wed, 13 Mar 2024 12:46:50 -0500 Subject: [PATCH 15/18] [InferAtomsPass] More tests and working impl. for some Rust programs Fix an issue with extracting IO functions from source code. Add several tests, including a few in Rust. --- benchmarks/ctests/example.bc | Bin 5984 -> 6304 bytes benchmarks/ctests/example.ll | 178 +++++++++++ benchmarks/ctests/example.orig.ll | 183 ++++++++++++ benchmarks/ctests/example.rs | 11 +- benchmarks/ctests/example09.c | 26 ++ benchmarks/ctests/example09.ll | 111 +++++++ benchmarks/ctests/example09.orig.ll | 100 +++++++ benchmarks/ctests/example10.c | 25 ++ benchmarks/ctests/example10.ll | 92 ++++++ benchmarks/ctests/example10.orig.ll | 100 +++++++ benchmarks/ctests/example11.bc | Bin 0 -> 6352 bytes benchmarks/ctests/example11.ll | 179 +++++++++++ benchmarks/ctests/example11.orig.ll | 184 ++++++++++++ benchmarks/ctests/example11.rs | 26 ++ benchmarks/ctests/example12.bc | Bin 0 -> 8160 bytes benchmarks/ctests/example12.ll | 274 +++++++++++++++++ benchmarks/ctests/example12.orig.ll | 279 ++++++++++++++++++ benchmarks/ctests/example12.rs | 26 ++ benchmarks/intermittent.rs | 65 ++-- ocelot/AtomicRegionInference/Makefile | 35 ++- .../AtomicRegionInference/src/InferAtoms.cpp | 9 +- .../src/TaintTracker.cpp | 12 +- 22 files changed, 1869 insertions(+), 46 deletions(-) create mode 100644 benchmarks/ctests/example.ll create mode 100644 benchmarks/ctests/example.orig.ll create mode 100644 benchmarks/ctests/example09.c create mode 100644 benchmarks/ctests/example09.ll create mode 100644 benchmarks/ctests/example09.orig.ll create mode 100644 benchmarks/ctests/example10.c create mode 100644 benchmarks/ctests/example10.ll create mode 100644 benchmarks/ctests/example10.orig.ll create mode 100644 benchmarks/ctests/example11.bc create mode 100644 benchmarks/ctests/example11.ll create mode 100644 benchmarks/ctests/example11.orig.ll create mode 100644 benchmarks/ctests/example11.rs create mode 100644 benchmarks/ctests/example12.bc create mode 100644 benchmarks/ctests/example12.ll create mode 100644 benchmarks/ctests/example12.orig.ll create mode 100644 benchmarks/ctests/example12.rs diff --git a/benchmarks/ctests/example.bc b/benchmarks/ctests/example.bc index 4163fd996b61aa6750aed7c63c25855384623a88..6513a4190f5bf1e1cc2d48f15cd4b9bc4661b52b 100644 GIT binary patch delta 2298 zcmZWqeQ;FO6+dtH?%TJU-OWpOlPrxSHy?Z?W-<#2DQX@e!6X{+t@uc|bynh}3@cLO_m zXU@KRe&^itJLlYw{kT4{$n#;cB#$tBkVF2Xo;#GYM?3FR{6~LZ9;`oJWfWFkwrC{W z$s3eLONd!N)r=E9jq9Ik$%N*N^SDuz8Y>*x%t_bIh1x6VLc-_Q0-``1x6t@J<%$w} zYabCucuglhp*)#?B;Qp;qT=0Yr=461kpUYqvbbKn6m%ELsU{?(5%aIR$ZG2-;DQQI z(n$-9{>|NaH+6BS76^#D+PW7!(oabPDuWorlSiCCzS5^YQwK9y1sHiHToYQr*jemVh(X0kU9xXQ?n z1cZdr*&p37MX@A@dM&TopO4tnktLep+x`io4Y3)tpUTvYGJZm;P=5n_D zeZ7vTO1076uKwu6kfI3gZytQYs(_HD&4(Qg>qgs&8VbZ>bu{1|T)4$>POAI*_2f`+ zCwz@~m%1Y8D%D#Is^({dOLdaRaYg0KTl;lm>-zaS+lD^l`H=;|?s8`3hljOTA2+{o zEU8LO*{W0q-sF$wj9un8Xe!pJl{0Vb*9%|q&99vK)gE)Bb8$_EK5kJ@7*?Li^yQwE zI%UA!1}UzbdF`;athd~J#n}+M?j6{SW2C8nOr$+!Kc(j$Ew+dDH!2r14W#F_>Lhy^Hf9*F>SJ6o@M~`|K@)mE-R7 zE%b*b))Kf^XJ4qTMRd}k1dem|w&&2_(V~<=Z+qUcy3@09ma3PTVQ=Y&0T%47p4jp9 zPyh3mjpm8GR%P1!ByXAd|2$n8H65-Sq3?H9lwKMN%Kcf|5!f7(wRwu7=(kLH+Dy5) zz4;h6j>yzq)zRtTOp!jV8Ib_E4Kw zUjvntMVYI)Nln}?bvYtDbHu$Yl|3Z~X32c!jH}4Q=PWWvXOI8m%--O6!z_2tQ5wvf z+%2&s{Z8u`qg1d!Tu_)G-eEfx!f|#jjHhbQbEfTv#}Ff<;u(wWc_soKA;&4X8S#l4 zncl19LN9vIcjYx&ParHV4H7-6puvQIO!cWb5NLgA~40kNK%q zx7k!jF=@6ux?dUz*T$n)?z3KIZK1Wo_LMD}yhqq4Z9_(}{zv~hd^K_D-Cf0xH$3ppEtYOP zcPHUfXpM85e23%~m7Ci z1_g-z3H(Or+aa}CT*2#Ch{mmg9*4ALarnn*6AYUGd;q!Ux`AE;ehc&_wEE^8{Ie|H z{~f#zckdziod*73j{hj|yP#K~$c;Jp*T5P0=|LEfbfyAidNpkzdL#!Iz^!->z)gE{ zZ~|@zJ`cF;QuS;cbfONpd_qQnBjmLXEe7a@{vvP}_^%N!gF(JkufpKW!P|j@hkgyb zA&YAO9s}-!{yA`O4t^|4U_({%e@h+VCC0HOnId zJ9@Wm32*K0ABdXV7aIRacUjbd-u|8)_nJ-K1*I!?cJGR2y><;mb`E6c-F;ilPVb81 zG#1W)z_PRPlin$d9bv*U&c))|vUHrRHJrOBELR~^gIQlv6#SkgOV=CO*0(x5n|wH% M{cOB+`ss4%TQ7_t7XSbN delta 1991 zcmZ`&eN0nV6hF6pZC^`UA5cR1($^Lh7_bNBQ|Ekz;gA6qha2;Y7K_3JERIaJ#q_lW zh7%LmV;M0zbwix)LkFAD#VpVnr;A37)0izYbH=zomZ(v;n2>De7Ldloo1Ay=`JLbW zopbKJZ-34o*~S~=@lk-ogjN|pV5}zF2i9#S@dM}PEy>(%6JxB`6)Z@GaaN4h4cVZ3 zWwKYqIcQCF2Ao|VR`gdmKWK*{-ef!20coWbE`A)AL9=cwojfGI(JlZUrCAR65YZUC z<&RDU2$Ukkp#hPgG^5_BWN--zo(vj7r>SYXU6 z5AT?327w9@G!9r{*c!jY2`(O81u}hUz`4qA+y8aJxPTa@K}CMN|42`Zn@E0iQ4keH z$#QkkM??uhbZ09YN=yuIk`SFrdEpnc00Q0gB<1KA6C_;c6RB0}2mE2F=2@*HH+Ba# ztb|qZR1Ej`)CvU-F5m%tUoWS;qcm+LYQId4c{)8>`J9+rXv~vPbUIrtn?EzjB!X zHnj=@E>~Tsmk^0J7x{aX*f_w5d`@d$IamQ;j~4$N2vwdQ=o}VWL{>xk%&8hSFk#cW zU8d8=h=pOqkR_>0iFMQ0^N(fnR68enYOl4I_ul8{xUG)HL`KQ)^0U6SB)M9bZ}T5e z+0+c?Jyw}8e4S5bu_rignLN?X#e8GBW|{o9Ssu~lrcb4Rb+lVykowFfU<3ZGs;Nw% z!YyYklgHm-m$oL#B+Bl4!k+0Rh`k_--ZN!3v%c-nokuQH0Ee3g$ig|6c`1|LGHrXX z!o^5`HQ|ajUOP$~A8a`w#FEscbbS&x;ead2ENcz!?;z1q$0C!a#9r|+S|aS`nZ3roTXd5MdyN;AhVW{PnaI8=f)*1L3XG$5UTDjj-g{Tn@}sCy zG9nl2m&%z2PCw$9jN?*7OO=>MWUt;#2IY17BpO1}cX~_G#a;)!Frh|ha01V&&&M_u zap4|D>+wiwV`x+f(N}iJY6wMDnZAop9(3{ zkfD##>2G^=__`gXWx#L{_)#MJ6*Zz$auLaS44|F&O}}6xXYEp)W{S9Ki3zIeq&Fn> zQB^0bmDszgA*o){1=T0Ung({PWIU=i4{XMKJ9UNKS0A$P)m_~-@$k;Puapn6 zY91$_A8osp3ype!Zsf!$2h(Vw^j;U@ht^q%M&Cr8Ts0@k7lSJn%cP?j8D^aEUxm3i zPyKVVamt~5I`$#QiSrz^6=?tA00H1G;?JSK9xY=UCwTygSjKYnd(q0Lape180|wS0 za0zYoYyf^hd@=eNSnHw){P8sIc?!?Nzo8R(pCG<1g8wDrucIG_O}-X^EAfY0fcarF z2IfZu(lLNJpdWE;W~wvnAPezS^ku|tGdOl!GtEE#6n_=sRQDv}bl=Pgy^A=?!S9IM zUziTCh#GN7e!4o6Qwj9>+l(TkLQ8U z;A9H=Z+EyGS{k#3UHw`|#jEPKJm!Uj)6@y0@r$DP;EP$n-oEtGefmW|yp-BeUX oY*4=YaI7XSbN diff --git a/benchmarks/ctests/example.ll b/benchmarks/ctests/example.ll new file mode 100644 index 0000000..1fce17b --- /dev/null +++ b/benchmarks/ctests/example.ll @@ -0,0 +1,178 @@ +; ModuleID = '../../benchmarks/ctests/example.bc' +source_filename = "example.a08634fc28d17a86-cgu.0" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hd44a932dcf55a427E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h06fb5f22a45b1729E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE" }>, align 8 +@IO_NAME = constant <{ ptr }> <{ ptr @tmp }>, align 8 +@atomic_depth = external global i16 + +; Function Attrs: noinline uwtable +define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h11952cf61e1518ebE(ptr %f) unnamed_addr #0 { +start: + call void @_ZN4core3ops8function6FnOnce9call_once17h2e8e8fa7347da120E(ptr %f) + call void asm sideeffect "", "~{memory}"(), !srcloc !3 + ret void +} + +; Function Attrs: uwtable +define hidden i64 @_ZN3std2rt10lang_start17ha3b54fab1f2518b9E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { +start: + %_8 = alloca ptr, align 8 + %_5 = alloca i64, align 8 + store ptr %main, ptr %_8, align 8 + %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe) + store i64 %0, ptr %_5, align 8 + %v = load i64, ptr %_5, align 8, !noundef !4 + ret i64 %v +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE"(ptr align 8 %_1) unnamed_addr #2 { +start: + %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h11952cf61e1518ebE(ptr %_4) + %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h026d1b3fd579707fE"() + %_0 = zext i8 %self to i32 + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h06fb5f22a45b1729E"(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h631194d6dbd64289E(ptr %0) + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @_ZN4core3ops8function6FnOnce9call_once17h2e8e8fa7347da120E(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + call void %_1() + ret void +} + +; Function Attrs: inlinehint uwtable +define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h631194d6dbd64289E(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality { +start: + %1 = alloca { ptr, i32 }, align 8 + %_2 = alloca {}, align 1 + %_1 = alloca ptr, align 8 + store ptr %0, ptr %_1, align 8 + %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE"(ptr align 8 %_1) + to label %bb1 unwind label %cleanup + +bb3: ; preds = %cleanup + %2 = load ptr, ptr %1, align 8, !noundef !4 + %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + %4 = load i32, ptr %3, align 8, !noundef !4 + %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 + %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 + resume { ptr, i32 } %6 + +cleanup: ; preds = %start + %7 = landingpad { ptr, i32 } + cleanup + %8 = extractvalue { ptr, i32 } %7, 0 + %9 = extractvalue { ptr, i32 } %7, 1 + %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0 + store ptr %8, ptr %10, align 8 + %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + store i32 %9, ptr %11, align 8 + br label %bb3 + +bb1: ; preds = %start + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hd44a932dcf55a427E"(ptr align 8 %_1) unnamed_addr #2 { +start: + ret void +} + +; Function Attrs: inlinehint uwtable +define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h026d1b3fd579707fE"() unnamed_addr #2 { +start: + ret i8 0 +} + +; Function Attrs: uwtable +define dso_local i32 @tmp() unnamed_addr #1 { +start: + ret i32 0 +} + +; Function Attrs: uwtable +define dso_local void @log(i32 %i) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @app() unnamed_addr #1 { +start: + call void @atomic_start() + %x = call i32 @tmp() + call void @log(i32 %x) + call void @atomic_end() + ret void +} + +; Function Attrs: uwtable +define internal void @_ZN7example4main17ha3370acdcff48c7aE() unnamed_addr #1 { +start: + call void @app() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_start() unnamed_addr #1 { +start: + %local = load i16, ptr @atomic_depth, align 2, !noundef !4 + call void @start_atomic() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_end() unnamed_addr #1 { +start: + call void @end_atomic() + ret void +} + +; Function Attrs: uwtable +declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 + +; Function Attrs: uwtable +declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1 + +; Function Attrs: uwtable +declare void @start_atomic() unnamed_addr #1 + +; Function Attrs: uwtable +declare void @end_atomic() unnamed_addr #1 + +define i32 @main(i32 %0, ptr %1) unnamed_addr #3 { +top: + %2 = sext i32 %0 to i64 + %3 = call i64 @_ZN3std2rt10lang_start17ha3b54fab1f2518b9E(ptr @_ZN7example4main17ha3370acdcff48c7aE, i64 %2, ptr %1, i8 0) + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 8, !"PIC Level", i32 2} +!1 = !{i32 7, !"PIE Level", i32 2} +!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} +!3 = !{i32 1115277} +!4 = !{} diff --git a/benchmarks/ctests/example.orig.ll b/benchmarks/ctests/example.orig.ll new file mode 100644 index 0000000..7cbde04 --- /dev/null +++ b/benchmarks/ctests/example.orig.ll @@ -0,0 +1,183 @@ +; ModuleID = '../../benchmarks/ctests/example.bc' +source_filename = "example.a08634fc28d17a86-cgu.0" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hd44a932dcf55a427E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h06fb5f22a45b1729E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE" }>, align 8 +@IO_NAME = constant <{ ptr }> <{ ptr @tmp }>, align 8 +@atomic_depth = external global i16 + +; Function Attrs: noinline uwtable +define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h11952cf61e1518ebE(ptr %f) unnamed_addr #0 { +start: + call void @_ZN4core3ops8function6FnOnce9call_once17h2e8e8fa7347da120E(ptr %f) + call void asm sideeffect "", "~{memory}"(), !srcloc !3 + ret void +} + +; Function Attrs: uwtable +define hidden i64 @_ZN3std2rt10lang_start17ha3b54fab1f2518b9E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { +start: + %_8 = alloca ptr, align 8 + %_5 = alloca i64, align 8 + store ptr %main, ptr %_8, align 8 + %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe) + store i64 %0, ptr %_5, align 8 + %v = load i64, ptr %_5, align 8, !noundef !4 + ret i64 %v +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE"(ptr align 8 %_1) unnamed_addr #2 { +start: + %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h11952cf61e1518ebE(ptr %_4) + %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h026d1b3fd579707fE"() + %_0 = zext i8 %self to i32 + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h06fb5f22a45b1729E"(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h631194d6dbd64289E(ptr %0) + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @_ZN4core3ops8function6FnOnce9call_once17h2e8e8fa7347da120E(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + call void %_1() + ret void +} + +; Function Attrs: inlinehint uwtable +define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h631194d6dbd64289E(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality { +start: + %1 = alloca { ptr, i32 }, align 8 + %_2 = alloca {}, align 1 + %_1 = alloca ptr, align 8 + store ptr %0, ptr %_1, align 8 + %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE"(ptr align 8 %_1) + to label %bb1 unwind label %cleanup + +bb3: ; preds = %cleanup + %2 = load ptr, ptr %1, align 8, !noundef !4 + %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + %4 = load i32, ptr %3, align 8, !noundef !4 + %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 + %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 + resume { ptr, i32 } %6 + +cleanup: ; preds = %start + %7 = landingpad { ptr, i32 } + cleanup + %8 = extractvalue { ptr, i32 } %7, 0 + %9 = extractvalue { ptr, i32 } %7, 1 + %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0 + store ptr %8, ptr %10, align 8 + %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + store i32 %9, ptr %11, align 8 + br label %bb3 + +bb1: ; preds = %start + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hd44a932dcf55a427E"(ptr align 8 %_1) unnamed_addr #2 { +start: + ret void +} + +; Function Attrs: inlinehint uwtable +define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h026d1b3fd579707fE"() unnamed_addr #2 { +start: + ret i8 0 +} + +; Function Attrs: uwtable +define dso_local i32 @tmp() unnamed_addr #1 { +start: + ret i32 0 +} + +; Function Attrs: uwtable +define dso_local void @log(i32 %i) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @app() unnamed_addr #1 { +start: + %x = call i32 @tmp() + call void @Fresh(i32 %x) + call void @log(i32 %x) + ret void +} + +; Function Attrs: uwtable +define internal void @_ZN7example4main17ha3370acdcff48c7aE() unnamed_addr #1 { +start: + call void @app() + ret void +} + +; Function Attrs: uwtable +define internal void @Fresh(i32 %_var) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_start() unnamed_addr #1 { +start: + %local = load i16, ptr @atomic_depth, align 2, !noundef !4 + call void @start_atomic() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_end() unnamed_addr #1 { +start: + call void @end_atomic() + ret void +} + +; Function Attrs: uwtable +declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 + +; Function Attrs: uwtable +declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1 + +; Function Attrs: uwtable +declare void @start_atomic() unnamed_addr #1 + +; Function Attrs: uwtable +declare void @end_atomic() unnamed_addr #1 + +define i32 @main(i32 %0, ptr %1) unnamed_addr #3 { +top: + %2 = sext i32 %0 to i64 + %3 = call i64 @_ZN3std2rt10lang_start17ha3b54fab1f2518b9E(ptr @_ZN7example4main17ha3370acdcff48c7aE, i64 %2, ptr %1, i8 0) + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 8, !"PIC Level", i32 2} +!1 = !{i32 7, !"PIE Level", i32 2} +!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} +!3 = !{i32 1115277} +!4 = !{} diff --git a/benchmarks/ctests/example.rs b/benchmarks/ctests/example.rs index 68583bd..bb6f8a1 100644 --- a/benchmarks/ctests/example.rs +++ b/benchmarks/ctests/example.rs @@ -1,15 +1,14 @@ -fn Fresh(_var: T) -> () {} - -fn Consistent(_var: T, _id: u16) -> () {} - -#[no_mangle] -pub static IO_NAME: fn() -> i32 = tmp; +include!("../intermittent.rs"); #[no_mangle] fn tmp() -> i32 { 0 } +#[no_mangle] +pub static IO_NAME: fn() -> i32 = tmp; + +#[no_mangle] fn log(i: i32) -> () {} #[no_mangle] diff --git a/benchmarks/ctests/example09.c b/benchmarks/ctests/example09.c new file mode 100644 index 0000000..ace57fa --- /dev/null +++ b/benchmarks/ctests/example09.c @@ -0,0 +1,26 @@ +#include + +void Fresh(int x) {} + +void atomic_start() {} +void atomic_end() {} + +int input() { return 0; } +int (*IO_NAME)() = input; + +void log(int x) { + printf("%d\n", x); +} + +void app() { + int x = input(); + for (int i = 0; i < 10; i++) { + log(x); + log(i); + } + Fresh(x); +} + +int main() { + app(); +} \ No newline at end of file diff --git a/benchmarks/ctests/example09.ll b/benchmarks/ctests/example09.ll new file mode 100644 index 0000000..5ff5b64 --- /dev/null +++ b/benchmarks/ctests/example09.ll @@ -0,0 +1,111 @@ +; ModuleID = '../../benchmarks/ctests/example09.c' +source_filename = "../../benchmarks/ctests/example09.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %0 = alloca i32, align 4 + %i = alloca i32, align 4 + call void @atomic_start() + %call = call i32 @input() + store i32 %call, ptr %x, align 4 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %entry, %for.inc, + %1 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %1, 10 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load i32, ptr %x, align 4 + call void @log(i32 noundef %2) + br label %for.inc + +for.inc: ; preds = %for.body, + %3 = load i32, ptr %i, align 4 + %inc = add nsw i32 %3, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond, !llvm.loop !5 + +for.end: ; preds = %for.cond + call void @atomic_end() + store i32 0, ptr %0, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc3, %for.end + %4 = load i32, ptr %0, align 4 + %5 = icmp slt i32 %4, 10 + br i1 %5, label %for.body2, label %for.end4 + +for.body2: ; preds = %for.cond1 + %6 = load i32, ptr %i, align 4 + call void @log(i32 noundef %6) + br label %for.inc3 + +for.inc3: ; preds = %for.body2 + %7 = load i32, ptr %0, align 4 + %8 = add nsw i32 %7, 1 + store i32 %8, ptr %0, align 4 + br label %for.cond1, !llvm.loop !5 + +for.end4: ; preds = %for.cond1 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} diff --git a/benchmarks/ctests/example09.orig.ll b/benchmarks/ctests/example09.orig.ll new file mode 100644 index 0000000..03d06bb --- /dev/null +++ b/benchmarks/ctests/example09.orig.ll @@ -0,0 +1,100 @@ +; ModuleID = '../../benchmarks/ctests/example09.c' +source_filename = "../../benchmarks/ctests/example09.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Fresh(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %i = alloca i32, align 4 + %call = call i32 @input() + store i32 %call, ptr %x, align 4 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 10 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32, ptr %x, align 4 + call void @log(i32 noundef %1) + %2 = load i32, ptr %i, align 4 + call void @log(i32 noundef %2) + br label %for.inc + +for.inc: ; preds = %for.body + %3 = load i32, ptr %i, align 4 + %inc = add nsw i32 %3, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond, !llvm.loop !5 + +for.end: ; preds = %for.cond + %4 = load i32, ptr %x, align 4 + call void @Fresh(i32 noundef %4) + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} diff --git a/benchmarks/ctests/example10.c b/benchmarks/ctests/example10.c new file mode 100644 index 0000000..4e57ff7 --- /dev/null +++ b/benchmarks/ctests/example10.c @@ -0,0 +1,25 @@ +#include + +void Fresh(int x) {} + +void atomic_start() {} +void atomic_end() {} + +int input() { return 0; } +int (*IO_NAME)() = input; + +void log(int x) { + printf("%d\n", x); +} + +void app() { + int x = input(); + for (int i = x; i < 10; i++) { + log(i + 2); + } + Fresh(x); +} + +int main() { + app(); +} \ No newline at end of file diff --git a/benchmarks/ctests/example10.ll b/benchmarks/ctests/example10.ll new file mode 100644 index 0000000..a2df8f1 --- /dev/null +++ b/benchmarks/ctests/example10.ll @@ -0,0 +1,92 @@ +; ModuleID = '../../benchmarks/ctests/example10.c' +source_filename = "../../benchmarks/ctests/example10.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %i = alloca i32, align 4 + call void @atomic_start() + %call = call i32 @input() + store i32 %call, ptr %x, align 4 + %0 = load i32, ptr %x, align 4 + store i32 %0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry, , + %1 = load i32, ptr %i, align 4 + %2 = icmp slt i32 %1, 10 + br i1 %2, label %for.body, label %for.end + +for.body: ; preds = %for.cond, + %3 = load i32, ptr %i, align 4 + %4 = add nsw i32 %3, 2 + call void @log(i32 noundef %4) + br label %for.inc + +for.inc: ; preds = %for.body, + %5 = load i32, ptr %i, align 4 + %6 = add nsw i32 %5, 1 + store i32 %6, ptr %i, align 4 + br label %for.cond, !llvm.loop !5 + +for.end: ; preds = %for.cond, + call void @atomic_end() + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} diff --git a/benchmarks/ctests/example10.orig.ll b/benchmarks/ctests/example10.orig.ll new file mode 100644 index 0000000..bbe99ff --- /dev/null +++ b/benchmarks/ctests/example10.orig.ll @@ -0,0 +1,100 @@ +; ModuleID = '../../benchmarks/ctests/example10.c' +source_filename = "../../benchmarks/ctests/example10.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@IO_NAME = global ptr @input, align 8 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @Fresh(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_start() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @atomic_end() #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @input() #0 { +entry: + ret i32 0 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @log(i32 noundef %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0) + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @app() #0 { +entry: + %x = alloca i32, align 4 + %i = alloca i32, align 4 + %call = call i32 @input() + store i32 %call, ptr %x, align 4 + %0 = load i32, ptr %x, align 4 + store i32 %0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %1 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %1, 10 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load i32, ptr %i, align 4 + %add = add nsw i32 %2, 2 + call void @log(i32 noundef %add) + br label %for.inc + +for.inc: ; preds = %for.body + %3 = load i32, ptr %i, align 4 + %inc = add nsw i32 %3, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond, !llvm.loop !5 + +for.end: ; preds = %for.cond + %4 = load i32, ptr %x, align 4 + call void @Fresh(i32 noundef %4) + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { +entry: + call void @app() + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 1} +!4 = !{!"Homebrew clang version 17.0.2"} +!5 = distinct !{!5, !6} +!6 = !{!"llvm.loop.mustprogress"} diff --git a/benchmarks/ctests/example11.bc b/benchmarks/ctests/example11.bc new file mode 100644 index 0000000000000000000000000000000000000000..076c5ac57cc33f1dd31c34eaedd8b91b7efcc204 GIT binary patch literal 6352 zcmcgv4R8}zet&DNw6ZO))>s5Pk;KaOafX7s`miivd#VpiY+?@KbM7diBd@*?i9duS z8^eWUZ5c~ROllZs&P}=2PMA4JFRcTFftJizlHq)ulXG&JmT(@%J`5o#yCE!z2Cm~`+vOu|NovYjV#MXa4Fy(&PV9K;Ii3AE|md51Ll%(O6yOv ze$trzCPVF-k_4tC!3oJthx`kRyqTd|0#pdV2?=zkS^Bzzwkg=}!aR)(q0=ISvNJR& z1Nx7^O?4I>u#S^|6}3sHSM$ zl4vhuJ(%GclfI>3?dxOg&j?+8cmAr9RejoLhqoPI&yE4_3oJsQIx6&$%rRS6ZEih0 z4(v=gYO*p{RG$;u>Q_ihUoEmMDZG#&MMBPqk}zaANYPcPWk?}qnXA%~L1|V$+izJK zKeA-N#Ue&}*0Di@&JQZ`rJRr}yoyagn?cAXct3WHy^H-s1iCmz8tRJ%{L!Fa)X@xU}RGc6x`t7Cxk!8Ye8cUk{MO7!_rV)D;nnv>3 z5&Fu6{69+;j-F;+$2L&s?HDPT-4A)jyRVjjt!M`u~MU=haty$%1tS9cHF8?4B=r@ z)2a{Rs%t~a86ptf6^I%eqd_SZLef1_YPTsG9E=+Csg5HQtDGX#;EIcbs$U~i3qGX4 zhk0F%HnxtDxNKZqes^MkQRd+%gZ-R044SnMi`M!h0fFiOUVo%r8r|6u-8~XD#YE)- z{31C#cxr;)GfOx3(d`y$Pa_qckbJ^WL4n$ndNxndA$A@=oH|{WU5Z$$-cLh((MtWJjl}AuA09#y8)6ktr6>UY1KP0I(FAQUTqczlWwtf>JD%5Pmt zBN-4|07WF$>f(#L)27@L_saLre@WaM@4qUNN(C6f;(4HLPs5$EYD>y1L<#}TH)yXC z85ZbaUx5fg^W^@Oat1Ak1B+xwpX=>Ih=b2q90<0!B9`fo2qdfq<8W=#sRtAVxpjTL znJXSYN80Pfo5?GVxq~r+T#sssyHm5~T?lQ9=%vw(E7x&b&|46LX8_Sl1?A|p(x)J* zVs=w^vezb1yE~}Rz)sm!B)`GXw**=*v#Aqt6{N3^6Q_nn9vc@`joGVuMOEV+y^!%g zfOxNo1|anVqV|qxfQ=T6=hD~v=xI*g!pSes(z6a)&y1=rX_OZVzD~j|K;#y9YW~e>O{h zHbvhap|5iy*BE5ska9froaB|~HOf&)WnS{HeRlJh-8_(bo=Tdaio`?}{mDNEp>tgo zABrl5ld_-ghz7PqgMmockCQE&9}N#i3*P&#{4*rK>7Zx&=*tVmUC)-NCgZrCeV|c7 zQJI`l&P=O5CDgwB&8A`d+C!0waYymIA&pU}Hys0;>u&YQu?N&otIzYwYZ}$1xav7V zjTP3YW_d}&zKCf&Sut#{0*A~cDrf8!G3X|H&t`$8(OsaA6qnWDKyD+V~1Pas?R18O|PDCE-PMUh{=J90ZsknJ82{u-a*PPfXr9i8l;J_`> zf?u7ZuQ$qXOi5;0`4tXV**gxpg`qEVG<3h1l6*2n&p7D2T+y$u^U6f0>caH0>l)Qn zjcQb*f)%``S?*(hRB^^$bxLIJkC?}|oPpH`W1;fxslm0d;O8IHzv1N5Q}ibm`6q)Q zB}s@vM<}qxIRK%8Jsh5ZrR?msnx~TBx+++yam(l+R94lLRedS0^cA}~dK#~<97wKk zc1@%$p9m(gw(p;Zv~L3IslkI4!$gSr~FsI&LLf%n%9G9{i$tU1O677DxBc5g7WqqT+AmX_;FIkv>@HX3=4!DwQ2oKC++%dO$`D?v_? z^n6*@U}KSVgE46#&&nbig`P~xgkEw*Ns-a`FUB99EI+pPq;8r1wNp2{@Lv1P&!lu&*ADRP`q_o`J+vPEIQ) zS>+6mpVvCo30^rdq#R2J03Rphw^;eKLk=||JC42B_v>95|=@qg>wYF^GGW$XQ{E|xo7?k)y(nB@xDE+ zn$ax(&s4?y9Eu>Wfq7*0ub9=~SBV&PH3mG17(obZer0Emh=8LOx|6G*95s2&Mrae- z^qi-9reyf+g)g2>SLFqnnEdbtsDrtJPI8#b>nw3wvXYYS05cM^27%eq-zZscl|9DT zEU1`0W;HPg9~Rda&YjuESUe3Yf;F+vSvmi0t9LmmV)wryeZGYynSunN_<&rO2SXR> z@!Au(k1w7DTi~5X5@Mk@UxB1u@o909gaoTbwiO(?z$&HKcap-nQ~Q`DZz)z3&YjpR zeSR<6XCU4u?{j7~*8Vfz!{FjsgqFhlVcYsMBtsSvn|9i8ATF70!2-=4?G?K!nerRF;y^?|bXI6 z_V^E>nYwruAvv_Z+BcjFjsQ~0F9xwF*yz)KS>$d&(*8}I+ejR3X*qW^bGH{{FO0#u zE|#NM4{_ctB5?fP3(k4IFONO^HDe!=oy#t!Lp%#|fp@WN9NX!rnrKKb|5QUFX2m&U zdXjTFu-Pvw{1DY?dn8M?J;aF*D0yUjKGP1SZ2u}-9@5V`x0F#WL{-{<_{;(^-&Wn5 z59hb8coIZof8W`>`en5GzUX@~C>eYpXCBn$#XqP^PD)O?uHYQUYhY#zv=+pWk(qz! zvW{6&S&*`Wt9uW}48Dd~h-G-cPX5e6ux}uZtvkkOY@^Z_9<{_|n!B> z=oxlhPb_!IO@S$})o*1AJ>A_OvYu|wBxfrNWQmSImfCWK^8_g%cC+}$4yZh`lK%Kg z@HI)b1+o;eB`C4ZsyUQEg#>bB{unPpe_|!}$J;0i)V62^8+05wgD{y}&A~>u5|3-X3z5)23LH}vE z@&1l+!T`MB%v$L8!(B6vVS61t5q(&jJpnq;LW7?0Nk8MK}Un4*0iW zTo>Rg7I?reBjE5uM;7w~UI`%1i8leqf1noR)&h?Av(tcQEy5cB2QerKIL-$=UJ5`A za0T@50FGS&hsw?e1RY1CjiIxF9#gky`ax&!0|YI;W$X)5FAOs zaUb8f&ZKad_;bLqUAVU2lZy%A`YQlV!@59Bo>D=-J{(nYx54b z)8h?<{m!l5(TBnwEf?Aya=Kfan_FAVF5dl2ILN!bI+N3hPkm08cl(YOXUN|gg!aN$ zZMnak)0gQ@THaUY)*E~tbM-u(E*REwO?=CCXDG}=yWDT)%|>^b%Tva2Wj?*u^#?|R z#NxJcS8?j$ZfXs+1-;PrV8BxhJYH?N(PQLvT7${sahVO6$KY-adiAY=kjdB9;ts>< z)VRK7Q;XYc;M{yuld~0?_gUwi5%TY7et=o)Dsy?u^#-%S=+S#!2)rh z&czwsUZ2ZYX5#+`GhIHeoHywVCXl%t-rAkYK z;nLFkzQu5Houjz89`45*ic3rHoBEIg@c>ZR_EoPBdPDyC=u8F9eB0aNSrl02 z`^JNF!rovD-vn&DhX-et^G1({(;7Hebt-bberEvwv9!XN9pT+6QfHbKlcrl~bxOHX z-5NY+P5Q6R8W;>&!+H#y!R_J=UcK4lD>IjsYdMqAY&MxZUQfB(1@?f&WjY@pY&II! zfUQm5HO;)cHPoTi0w;IR>{1hF@|5YloSt`^%Xpt@jeC1rDd(Iw?f)zV7rE+><=|Hj z{a+*@5dEpL@aw4mNE&|gsSoAhy{Z4ti3oE41epkV0;%YPTy)}O1R>ub8=a|ioX?ST yAz$d=+yN52bKp|{O*1mRnLL=14-?NPJ <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hbf6a0eaa1969aba0E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h1b5be734946d3eb7E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E" }>, align 8 +@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8 +@atomic_depth = external global i16 + +; Function Attrs: noinline uwtable +define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7035381af5c34fd9E(ptr %f) unnamed_addr #0 { +start: + call void @_ZN4core3ops8function6FnOnce9call_once17h3bf07a824888a276E(ptr %f) + call void asm sideeffect "", "~{memory}"(), !srcloc !3 + ret void +} + +; Function Attrs: uwtable +define hidden i64 @_ZN3std2rt10lang_start17h9a96c5bd5005f31bE(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { +start: + %_8 = alloca ptr, align 8 + %_5 = alloca i64, align 8 + store ptr %main, ptr %_8, align 8 + %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe) + store i64 %0, ptr %_5, align 8 + %v = load i64, ptr %_5, align 8, !noundef !4 + ret i64 %v +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E"(ptr align 8 %_1) unnamed_addr #2 { +start: + %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7035381af5c34fd9E(ptr %_4) + %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h22ca6c304b09fb94E"() + %_0 = zext i8 %self to i32 + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h1b5be734946d3eb7E"(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h32b22b06cefb658aE(ptr %0) + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h32b22b06cefb658aE(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality { +start: + %1 = alloca { ptr, i32 }, align 8 + %_2 = alloca {}, align 1 + %_1 = alloca ptr, align 8 + store ptr %0, ptr %_1, align 8 + %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E"(ptr align 8 %_1) + to label %bb1 unwind label %cleanup + +bb3: ; preds = %cleanup + %2 = load ptr, ptr %1, align 8, !noundef !4 + %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + %4 = load i32, ptr %3, align 8, !noundef !4 + %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 + %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 + resume { ptr, i32 } %6 + +cleanup: ; preds = %start + %7 = landingpad { ptr, i32 } + cleanup + %8 = extractvalue { ptr, i32 } %7, 0 + %9 = extractvalue { ptr, i32 } %7, 1 + %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0 + store ptr %8, ptr %10, align 8 + %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + store i32 %9, ptr %11, align 8 + br label %bb3 + +bb1: ; preds = %start + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @_ZN4core3ops8function6FnOnce9call_once17h3bf07a824888a276E(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + call void %_1() + ret void +} + +; Function Attrs: inlinehint uwtable +define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hbf6a0eaa1969aba0E"(ptr align 8 %_1) unnamed_addr #2 { +start: + ret void +} + +; Function Attrs: inlinehint uwtable +define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h22ca6c304b09fb94E"() unnamed_addr #2 { +start: + ret i8 0 +} + +; Function Attrs: uwtable +define dso_local i32 @input() unnamed_addr #1 { +start: + ret i32 0 +} + +; Function Attrs: uwtable +define dso_local void @log(i32 %i) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @app() unnamed_addr #1 { +start: + call void @atomic_start() + %x = call i32 @input() + call void @log(i32 %x) + call void @atomic_end() + call void @log(i32 1) + ret void +} + +; Function Attrs: uwtable +define internal void @_ZN9example114main17h0b701389294a589fE() unnamed_addr #1 { +start: + call void @app() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_start() unnamed_addr #1 { +start: + %local = load i16, ptr @atomic_depth, align 2, !noundef !4 + call void @start_atomic() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_end() unnamed_addr #1 { +start: + call void @end_atomic() + ret void +} + +; Function Attrs: uwtable +declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 + +; Function Attrs: uwtable +declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1 + +; Function Attrs: uwtable +declare void @start_atomic() unnamed_addr #1 + +; Function Attrs: uwtable +declare void @end_atomic() unnamed_addr #1 + +define i32 @main(i32 %0, ptr %1) unnamed_addr #3 { +top: + %2 = sext i32 %0 to i64 + %3 = call i64 @_ZN3std2rt10lang_start17h9a96c5bd5005f31bE(ptr @_ZN9example114main17h0b701389294a589fE, i64 %2, ptr %1, i8 0) + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 8, !"PIC Level", i32 2} +!1 = !{i32 7, !"PIE Level", i32 2} +!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} +!3 = !{i32 1115326} +!4 = !{} diff --git a/benchmarks/ctests/example11.orig.ll b/benchmarks/ctests/example11.orig.ll new file mode 100644 index 0000000..49a1d31 --- /dev/null +++ b/benchmarks/ctests/example11.orig.ll @@ -0,0 +1,184 @@ +; ModuleID = '../../benchmarks/ctests/example11.bc' +source_filename = "example11.808d53e03ac95af8-cgu.0" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hbf6a0eaa1969aba0E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h1b5be734946d3eb7E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E" }>, align 8 +@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8 +@atomic_depth = external global i16 + +; Function Attrs: noinline uwtable +define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7035381af5c34fd9E(ptr %f) unnamed_addr #0 { +start: + call void @_ZN4core3ops8function6FnOnce9call_once17h3bf07a824888a276E(ptr %f) + call void asm sideeffect "", "~{memory}"(), !srcloc !3 + ret void +} + +; Function Attrs: uwtable +define hidden i64 @_ZN3std2rt10lang_start17h9a96c5bd5005f31bE(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { +start: + %_8 = alloca ptr, align 8 + %_5 = alloca i64, align 8 + store ptr %main, ptr %_8, align 8 + %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe) + store i64 %0, ptr %_5, align 8 + %v = load i64, ptr %_5, align 8, !noundef !4 + ret i64 %v +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E"(ptr align 8 %_1) unnamed_addr #2 { +start: + %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7035381af5c34fd9E(ptr %_4) + %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h22ca6c304b09fb94E"() + %_0 = zext i8 %self to i32 + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h1b5be734946d3eb7E"(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h32b22b06cefb658aE(ptr %0) + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h32b22b06cefb658aE(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality { +start: + %1 = alloca { ptr, i32 }, align 8 + %_2 = alloca {}, align 1 + %_1 = alloca ptr, align 8 + store ptr %0, ptr %_1, align 8 + %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E"(ptr align 8 %_1) + to label %bb1 unwind label %cleanup + +bb3: ; preds = %cleanup + %2 = load ptr, ptr %1, align 8, !noundef !4 + %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + %4 = load i32, ptr %3, align 8, !noundef !4 + %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 + %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 + resume { ptr, i32 } %6 + +cleanup: ; preds = %start + %7 = landingpad { ptr, i32 } + cleanup + %8 = extractvalue { ptr, i32 } %7, 0 + %9 = extractvalue { ptr, i32 } %7, 1 + %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0 + store ptr %8, ptr %10, align 8 + %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + store i32 %9, ptr %11, align 8 + br label %bb3 + +bb1: ; preds = %start + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @_ZN4core3ops8function6FnOnce9call_once17h3bf07a824888a276E(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + call void %_1() + ret void +} + +; Function Attrs: inlinehint uwtable +define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hbf6a0eaa1969aba0E"(ptr align 8 %_1) unnamed_addr #2 { +start: + ret void +} + +; Function Attrs: inlinehint uwtable +define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h22ca6c304b09fb94E"() unnamed_addr #2 { +start: + ret i8 0 +} + +; Function Attrs: uwtable +define dso_local i32 @input() unnamed_addr #1 { +start: + ret i32 0 +} + +; Function Attrs: uwtable +define dso_local void @log(i32 %i) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @app() unnamed_addr #1 { +start: + %x = call i32 @input() + call void @log(i32 1) + call void @log(i32 %x) + call void @Fresh(i32 %x) + ret void +} + +; Function Attrs: uwtable +define internal void @_ZN9example114main17h0b701389294a589fE() unnamed_addr #1 { +start: + call void @app() + ret void +} + +; Function Attrs: uwtable +define internal void @Fresh(i32 %_var) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_start() unnamed_addr #1 { +start: + %local = load i16, ptr @atomic_depth, align 2, !noundef !4 + call void @start_atomic() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_end() unnamed_addr #1 { +start: + call void @end_atomic() + ret void +} + +; Function Attrs: uwtable +declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 + +; Function Attrs: uwtable +declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1 + +; Function Attrs: uwtable +declare void @start_atomic() unnamed_addr #1 + +; Function Attrs: uwtable +declare void @end_atomic() unnamed_addr #1 + +define i32 @main(i32 %0, ptr %1) unnamed_addr #3 { +top: + %2 = sext i32 %0 to i64 + %3 = call i64 @_ZN3std2rt10lang_start17h9a96c5bd5005f31bE(ptr @_ZN9example114main17h0b701389294a589fE, i64 %2, ptr %1, i8 0) + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 8, !"PIC Level", i32 2} +!1 = !{i32 7, !"PIE Level", i32 2} +!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} +!3 = !{i32 1115326} +!4 = !{} diff --git a/benchmarks/ctests/example11.rs b/benchmarks/ctests/example11.rs new file mode 100644 index 0000000..8f355c3 --- /dev/null +++ b/benchmarks/ctests/example11.rs @@ -0,0 +1,26 @@ +include!("../intermittent.rs"); + +#[no_mangle] +fn input() -> i32 { + 0 +} + +#[no_mangle] +pub static IO_NAME: fn() -> i32 = input; + +#[no_mangle] +fn log(i: i32) -> () {} + +#[no_mangle] +fn app() -> () { + let x = input(); + let y = 1; + let z = y; + log(z); + log(x); + Fresh(x); +} + +fn main() -> () { + app() +} diff --git a/benchmarks/ctests/example12.bc b/benchmarks/ctests/example12.bc new file mode 100644 index 0000000000000000000000000000000000000000..61bd73f2e2c1e9323279a9713566f04456214c7f GIT binary patch literal 8160 zcmd5=4Rljgp1)~cUtUX^ywUJ(*N6b{?Uhv=YI1lP0n%k(?#oBki000?aPARJ}zm@!oW5%y& z$>vegH%j_P$ZIzF*Cu%#EvffO0sxMX&>drBS8Xy&I&%-qQ%VuqMj$jJL5UKeKN~Jd zbIxw_5K&IjYU7iluykP}m6=UE=H$~?Bv_uDVcAcs(+yO{Pm|SEZDqbeWlqV=NIPwQ zGQmAW9ZYAerESb_MCz(=#*(}x9jb?WAKcBnIRtWdFbK_q=K?Q5AF{NRr&hsj_r?KR zS#r{%6(bP@3(h1^h)?NLh_nedLVTGr^`uh_eVOX(rjk3&i`~TYJ4Z}O zmdvNC7TQ*@FB30hP>8UoI;$vNotH=^NF7A?3+OVHfGRT(s)RM?U17e(J`?#`SegoS zM10i|f3>)&DH8Naw&qE;pmSqc=Xz4k^e6`U6{sb9z?}VVkK#&C$_Y{SXCg|{#7i3U zBD(5`0qKUC$FVFcO(t-%Yqacho2(AWzwD5GVUk~Bi{&DCO9+Q&-x?uf4=63CNmtqyJEBU{mcD0{Ry>ts*LM@ku=l9)<}4)x@=iGs4*7UVlLH(bYNR3?n<88KB8E1xo`+8;g#)jR$hMBl>N;eNCdpQZBsfBT zN=y8_WUFwm8?!_?6-@1PPTwO-U`f-V#i z2xR&?mUyo_M0_)JvBP?i30C~3IMB0@ksK5Xc(|=KBUYg(= z6NUuGI@Dd_WD1EV&4dZ9YxZSU%%Wk2pbDNVeW_OCSj1o#@^~i9&8XV3zPrLCsDRA` zW!U}B-}1c(8K47`q0uI1$iyv&C@;n8%i&UIcV%RzR&=x{En0bkuvR5j5|?bp4jmNb zAfgnN<%aP*XuNO-6~r?Z7VT`INSJ&Oa{M^p8j4p8jw!t9qT)8IAslVD@RBV}l0er+ z>18DUoR(eZWg4j^d!RoHXu``T3d2J4hQ-B0*5YbuHv+@h$vT>VCLl0)1S1QgZvj(_qFS`TN#|RXvxafGOC@d}<9#3y{ zNPb=|(PPe%=y}WnJ_qoTr19HC5WAU=*rC2MBL1$(magWdxT;5GH~M5(S+R4dM{%)7 zF)ZBAaf*{l#UM}yM}E*@1x7G-3HM`B1F%&aRD1MqV9|+|!qeiy-l+7gnuu?0#P16Q zt8pO*Ga|w6Nanlu$iG1HYc|=%4%vmN4MoG`Wu5ECy!oC|0pxXVRB>@E>oZZdH>1+f zYhAuKR5)zQoixM|0bXnBs;s!)A;%ub-jIEgQ(RGIo$t@uCd$SNE3?Kqa?SRTVK`dY zYb^$cObirVv=+8OH`@MYGFTef4Ej)*zWrUz4_=^USAke4*BP!lKJ*0vM*Wr*r@DNll;?eSS2bF2VRtbE%q)* z73|^Q2!ygZY&MQY!F9zDsbSM#HxPZ+s5$$5zrvgAVr64EzH$(9iM1+2n=&MUvWUGq z@-Po`4|a-*+M`9CQNxIJIi%AtcuZQRoMqv%LDhHrv#OfYy^8g${2D8}Ixf3_wlD}S zg}nD?moe5hX=ajNUB=`zx9{G4s-&CFa~G-9md87k`wVmsDnCRbTe+2~@HBdAZRIOs zFaAw|po?kr_yaW!^^7WCugPbbJeNzS;&P!>8S>QH2mN@ay3#e>@n~%>4V_6rK^Z!$z z#;@2IA*K;;o~5!@@t z^l{^u7)wvBiG}Z91$>yww~+hUwC22OQ*x9H`{=$la|=keJ=IH=nx*q-iwWg2hs*{V z;e+bmFPJ#Khc>y_H2TZhzGCFuQnM$Q5Hq`8q8_bZNYI%BqV)HO64)x@^>_{+NrlES zv9oR(Axh-iGtv>&(yt=>W7ZTsKrYcy61)LmpWM4(;@BQ~=E3yF1rtYhQ;$A^wrfSF zh@18$J1UpMj*1({AcXgGu#ZgkQ6@*o$4H>>RhLBExvem7&DkB}@83I9JMAX;Ha2e- zJa6?&R~6Y~5;HM}_wwdm>;itI#+mw&&j7FRasq7>azq8!v^o zcEKlcAp#`kTO5Nl0%vJ#ol?+aYVD~pI9cM?IbAA-Jl~$s;uv)dR9jH{BC5YutVD-X zn(10vM&R5iwUiB3ok{xY{lBft8(jU}Ei$qF@iNm(q2|J35spE825?O5cUJsZy#C>o ze(ZBAjyjHcsT0=w_Q+EikdE7?5)p7bUkA^r7-L_CSMh6-Z1Ym!72>?hN$wjtYs(X7 zZs_Pf;S15-{YyhM5hY8;q!UuIuvBtj%)|r@SNN1J(_ni#|{r-u5L}=`%6IPFN<6Jdpu07kspK=lF#=EFsz^ zw$c}mS|*M?(e>E)?_RJ>y!r&z562(dw~p+A_*)OuW_H~DVU=m{Jxu1%I3`w$X+-~Q z9PHcq@0J+PWa3>c$S46>X~c@@Itaso?I}|w_D>bLk4-z3=V74^gpN~4u2Z^N6E%#k z>!@}WRK^K-limf^zmf9hFKpc>+L|%{nOrO}Sq*Qb%n+&p`>#T%1{POCo4E(Z88(c7 zM_04x`ir|oJI7l2gwXvYN0myG5z%i_hgPnPy{!^&80 zV;#94{JZKv?cycQPw|g^XdDwehJ$Y>+58tmo%|plTP9}FSySv2lZA5ucy)(4HHRc1 zqu7GEGQ2jvN~@MWj+Rbo6~|X(_xHFN|17wv&trd7ADrCrHG}KR$YiQah$cc|=_?Le zZNvE~*uqx4D~9p7^)LwGO>!Uc8=HuE87VE{Hd<*Jq;||TwMmr=O4?|ZrAkIGaD@kt zGbOEUsWY$fbf%^10eXQu96rss!|rperNEagevv?CODca-MDWN_nRvhkJDim5>|c!c zfla<-Fk56%?oCL1j^KTjwxk0rUxrZUz+&u=fA%E&_{v$`4E$nocakO~V#??Y zw1iGflqAnaR3ZdrBYcTDP7P*-#orQTw)^?SR}-s89^@rw6WbU&pVULX$UMZ)c%2+V zb$rTM@=9+#FW)Dj4sPXBk4W-*9_7=*RN1LEe%3Fk7n^q?^jPxg(y!BIPdqbN`6#nR z{pxp))t~HsUi|xiD?d61mO>tI!t3yyDefG_^^N^M4u)fkD&PY95ZEd-{KWs?j3&hT zYM21WFSH7t3_Nj8;5PL=e#hf++W=$0@j`z!Jn>%=2z>b(;CJHh;aW0@i{Sin3Gj00 zcftkHm>!=G(RdL0=ipiwhwlU&_w}Ig19A8zzzdfIkKOhXKbrP3d0^M&Ad08XR|T9R4qWyP)p{+!=>2fDPvn=nnz@a2)ZbA&0RXj4uZq%l{xw zKE_u9j^%$GCm-WgfMflx0uG;PeeMGsK0?EJCg=&t#{r1@_<|6{aF|#FI9?ZX-IQGv z76|QU0Q5i{A?LqY3jM|)=dAVQv*oMpcbo58!37)YYFu`=#}}-&KXA7u5Ok~9z?OjB z)lgU0P;Yc{u7`qt&gD@X>~{RC$L{p3uc@~OsvG?9KJ~3iUu|^h^*X)I#pw)s&Rw8h zF)2>%531N&u715e5ai%pU(IUuTD9Bdbr*OIdY9VxM@GVmxtsLPT;b-bZ3t}gd*I!T z0e3FQ(P)f%73*}n3?2`sGU%}!t=^tnu_m`hqt4x=W^=iK@NhNwJ^A@HL61K_-w&!l z=boU)m%DNe$niG#H*rI6x{T!&Ssr#PNYEEq^&}p>>y;0-Vx}1MfvT<54=)}2= zde&g@78DqbDwo>z!)Lm5oY4)A=2(_<=`^03Gkrn7p}_5L@Yx}zzFLSg_8uew_$ip5 ze=}?t&aJTJ=2pQ4Zp+WVY3ePpiTSl!tB0^SRXQi9@jy=8bfOkVqCg0QidDq|R_Cj| zB~*}jHNikEaFD1FymFX`--MwMxVR}0nCakF;TU6;%~%zDY3n^rIQKLvgU;*qx>Oo9 ztKl>{?1}=dpu)6yafu^%!sl;rc>*APjmKYCQ_tbhW0^X?#|KpaQe8TiQ(K^RY4vKO zOXV%VQgs?ZYPq1|ZSru+I9Ee$t;Yqz3m%;I#ujkIO#kGnLb*(->SHX925qy1=@c zdf%pCZNqxb=L0p2o+b{ac~oj`9amG2om^0$F{;%CPADah}byC#gnYfysAI zy&D8z(!nfYyqRv0j$n5^R}0cOH|J&bdQRtdvnnm?Tp`eqr`qm=zh4?)Oigf$Ak-d{ ziiO7BV(R3x`RZkO&a&A5G|ONxP$1*hvQU4V)}t}Hy#>Ys;1Yw*Xfzny9=G1*^r%!| zZh_j%`RjDrWngQqXIUNRY6vu`R3MX`HalPKap^T)x61{z;Z_%Dmbun%%4hA9rv0Ds zX`HM69HHKR=>LLILC}vzsaW=p#HoKu^q(QsA3XI|R-K;u7pB#zb^l-SDk$}15G#oN zv6&T?_^VMX5Fc=>9mv&=*%egz0rYAY_;r%>V#lnh4kkko@;{~6*irBM9DA$aNfyU6 XTVbC%SlTB~mQxxCM@!)-I(_>u&eATE literal 0 HcmV?d00001 diff --git a/benchmarks/ctests/example12.ll b/benchmarks/ctests/example12.ll new file mode 100644 index 0000000..7438e4d --- /dev/null +++ b/benchmarks/ctests/example12.ll @@ -0,0 +1,274 @@ +; ModuleID = '../../benchmarks/ctests/example12.bc' +source_filename = "example12.2ec73fdcc3bed253-cgu.0" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E" }>, align 8 +@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8 +@atomic_depth = external global i16 + +; Function Attrs: noinline uwtable +define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %f) unnamed_addr #0 { +start: + call void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %f) + call void asm sideeffect "", "~{memory}"(), !srcloc !3 + ret void +} + +; Function Attrs: uwtable +define hidden i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { +start: + %_8 = alloca ptr, align 8 + %_5 = alloca i64, align 8 + store ptr %main, ptr %_8, align 8 + %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe) + store i64 %0, ptr %_5, align 8 + %v = load i64, ptr %_5, align 8, !noundef !4 + ret i64 %v +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1) unnamed_addr #2 { +start: + %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %_4) + %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"() + %_0 = zext i8 %self to i32 + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %start1, i64 %n) unnamed_addr #2 { +start: + %rhs = trunc i64 %n to i32 + %_0 = add nsw i32 %start1, %rhs + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE"(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0) + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality { +start: + %1 = alloca { ptr, i32 }, align 8 + %_2 = alloca {}, align 1 + %_1 = alloca ptr, align 8 + store ptr %0, ptr %_1, align 8 + %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1) + to label %bb1 unwind label %cleanup + +bb3: ; preds = %cleanup + %2 = load ptr, ptr %1, align 8, !noundef !4 + %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + %4 = load i32, ptr %3, align 8, !noundef !4 + %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 + %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 + resume { ptr, i32 } %6 + +cleanup: ; preds = %start + %7 = landingpad { ptr, i32 } + cleanup + %8 = extractvalue { ptr, i32 } %7, 0 + %9 = extractvalue { ptr, i32 } %7, 1 + %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0 + store ptr %8, ptr %10, align 8 + %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + store i32 %9, ptr %11, align 8 + br label %bb3 + +bb1: ; preds = %start + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + call void %_1() + ret void +} + +; Function Attrs: inlinehint uwtable +define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E"(ptr align 8 %_1) unnamed_addr #2 { +start: + ret void +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %self) unnamed_addr #2 { +start: + %0 = call { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self) + %_0.0 = extractvalue { i32, i32 } %0, 0 + %_0.1 = extractvalue { i32, i32 } %0, 1 + %1 = insertvalue { i32, i32 } poison, i32 %_0.0, 0 + %2 = insertvalue { i32, i32 } %1, i32 %_0.1, 1 + ret { i32, i32 } %2 +} + +; Function Attrs: inlinehint uwtable +define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"() unnamed_addr #2 { +start: + ret i8 0 +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %self.0, i32 %self.1) unnamed_addr #2 { +start: + %0 = insertvalue { i32, i32 } poison, i32 %self.0, 0 + %1 = insertvalue { i32, i32 } %0, i32 %self.1, 1 + ret { i32, i32 } %1 +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self) unnamed_addr #2 { +start: + %_0 = alloca { i32, i32 }, align 4 + %_4 = getelementptr inbounds { i32, i32 }, ptr %self, i32 0, i32 1 + %_3.i = load i32, ptr %self, align 4, !noundef !4 + %_4.i = load i32, ptr %_4, align 4, !noundef !4 + %_0.i = icmp slt i32 %_3.i, %_4.i + br i1 %_0.i, label %bb2, label %bb4 + +bb4: ; preds = %start + store i32 0, ptr %_0, align 4 + br label %bb5 + +bb2: ; preds = %start + %old = load i32, ptr %self, align 4, !noundef !4 + %_6 = call i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %old, i64 1) + store i32 %_6, ptr %self, align 4 + %0 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1 + store i32 %old, ptr %0, align 4 + store i32 1, ptr %_0, align 4 + br label %bb5 + +bb5: ; preds = %bb2, %bb4 + %1 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 0 + %2 = load i32, ptr %1, align 4, !range !5, !noundef !4 + %3 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1 + %4 = load i32, ptr %3, align 4 + %5 = insertvalue { i32, i32 } poison, i32 %2, 0 + %6 = insertvalue { i32, i32 } %5, i32 %4, 1 + ret { i32, i32 } %6 +} + +; Function Attrs: uwtable +define dso_local i32 @input() unnamed_addr #1 { +start: + ret i32 0 +} + +; Function Attrs: uwtable +define dso_local void @log(i32 %i) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @app() unnamed_addr #1 { +start: + %_5 = alloca { i32, i32 }, align 4 + %iter = alloca { i32, i32 }, align 4 + %_3 = alloca { i32, i32 }, align 4 + call void @atomic_start() + %x = call i32 @input() + store i32 0, ptr %_3, align 4 + %0 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1 + store i32 10, ptr %0, align 4 + %1 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 0 + %2 = load i32, ptr %1, align 4, !noundef !4 + %3 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1 + %4 = load i32, ptr %3, align 4, !noundef !4 + %5 = call { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %2, i32 %4) + %_2.0 = extractvalue { i32, i32 } %5, 0 + %_2.1 = extractvalue { i32, i32 } %5, 1 + %6 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 0 + store i32 %_2.0, ptr %6, align 4 + %7 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 1 + store i32 %_2.1, ptr %7, align 4 + br label %bb3 + +bb3: ; preds = %bb5, %start + %8 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %iter) + store { i32, i32 } %8, ptr %_5, align 4 + %9 = load i32, ptr %_5, align 4, !range !5, !noundef !4 + %_7 = zext i32 %9 to i64 + %10 = icmp eq i64 %_7, 0 + br i1 %10, label %bb7, label %bb5 + +bb7: ; preds = %bb3 + call void @atomic_end() + ret void + +bb5: ; preds = %bb3 + call void @log(i32 1) + call void @log(i32 %x) + br label %bb3 + +bb6: ; No predecessors! + unreachable +} + +; Function Attrs: uwtable +define internal void @_ZN9example124main17h35539225bd174e48E() unnamed_addr #1 { +start: + call void @app() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_start() unnamed_addr #1 { +start: + %local = load i16, ptr @atomic_depth, align 2, !noundef !4 + call void @start_atomic() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_end() unnamed_addr #1 { +start: + call void @end_atomic() + ret void +} + +; Function Attrs: uwtable +declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 + +; Function Attrs: uwtable +declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1 + +; Function Attrs: uwtable +declare void @start_atomic() unnamed_addr #1 + +; Function Attrs: uwtable +declare void @end_atomic() unnamed_addr #1 + +define i32 @main(i32 %0, ptr %1) unnamed_addr #3 { +top: + %2 = sext i32 %0 to i64 + %3 = call i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr @_ZN9example124main17h35539225bd174e48E, i64 %2, ptr %1, i8 0) + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 8, !"PIC Level", i32 2} +!1 = !{i32 7, !"PIE Level", i32 2} +!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} +!3 = !{i32 1453212} +!4 = !{} +!5 = !{i32 0, i32 2} diff --git a/benchmarks/ctests/example12.orig.ll b/benchmarks/ctests/example12.orig.ll new file mode 100644 index 0000000..a4c7d70 --- /dev/null +++ b/benchmarks/ctests/example12.orig.ll @@ -0,0 +1,279 @@ +; ModuleID = '../../benchmarks/ctests/example12.bc' +source_filename = "example12.2ec73fdcc3bed253-cgu.0" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E" }>, align 8 +@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8 +@atomic_depth = external global i16 + +; Function Attrs: noinline uwtable +define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %f) unnamed_addr #0 { +start: + call void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %f) + call void asm sideeffect "", "~{memory}"(), !srcloc !3 + ret void +} + +; Function Attrs: uwtable +define hidden i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { +start: + %_8 = alloca ptr, align 8 + %_5 = alloca i64, align 8 + store ptr %main, ptr %_8, align 8 + %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe) + store i64 %0, ptr %_5, align 8 + %v = load i64, ptr %_5, align 8, !noundef !4 + ret i64 %v +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1) unnamed_addr #2 { +start: + %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %_4) + %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"() + %_0 = zext i8 %self to i32 + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %start1, i64 %n) unnamed_addr #2 { +start: + %rhs = trunc i64 %n to i32 + %_0 = add nsw i32 %start1, %rhs + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE"(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0) + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality { +start: + %1 = alloca { ptr, i32 }, align 8 + %_2 = alloca {}, align 1 + %_1 = alloca ptr, align 8 + store ptr %0, ptr %_1, align 8 + %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1) + to label %bb1 unwind label %cleanup + +bb3: ; preds = %cleanup + %2 = load ptr, ptr %1, align 8, !noundef !4 + %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + %4 = load i32, ptr %3, align 8, !noundef !4 + %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 + %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 + resume { ptr, i32 } %6 + +cleanup: ; preds = %start + %7 = landingpad { ptr, i32 } + cleanup + %8 = extractvalue { ptr, i32 } %7, 0 + %9 = extractvalue { ptr, i32 } %7, 1 + %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0 + store ptr %8, ptr %10, align 8 + %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + store i32 %9, ptr %11, align 8 + br label %bb3 + +bb1: ; preds = %start + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + call void %_1() + ret void +} + +; Function Attrs: inlinehint uwtable +define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E"(ptr align 8 %_1) unnamed_addr #2 { +start: + ret void +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %self) unnamed_addr #2 { +start: + %0 = call { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self) + %_0.0 = extractvalue { i32, i32 } %0, 0 + %_0.1 = extractvalue { i32, i32 } %0, 1 + %1 = insertvalue { i32, i32 } poison, i32 %_0.0, 0 + %2 = insertvalue { i32, i32 } %1, i32 %_0.1, 1 + ret { i32, i32 } %2 +} + +; Function Attrs: inlinehint uwtable +define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"() unnamed_addr #2 { +start: + ret i8 0 +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %self.0, i32 %self.1) unnamed_addr #2 { +start: + %0 = insertvalue { i32, i32 } poison, i32 %self.0, 0 + %1 = insertvalue { i32, i32 } %0, i32 %self.1, 1 + ret { i32, i32 } %1 +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self) unnamed_addr #2 { +start: + %_0 = alloca { i32, i32 }, align 4 + %_4 = getelementptr inbounds { i32, i32 }, ptr %self, i32 0, i32 1 + %_3.i = load i32, ptr %self, align 4, !noundef !4 + %_4.i = load i32, ptr %_4, align 4, !noundef !4 + %_0.i = icmp slt i32 %_3.i, %_4.i + br i1 %_0.i, label %bb2, label %bb4 + +bb4: ; preds = %start + store i32 0, ptr %_0, align 4 + br label %bb5 + +bb2: ; preds = %start + %old = load i32, ptr %self, align 4, !noundef !4 + %_6 = call i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %old, i64 1) + store i32 %_6, ptr %self, align 4 + %0 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1 + store i32 %old, ptr %0, align 4 + store i32 1, ptr %_0, align 4 + br label %bb5 + +bb5: ; preds = %bb2, %bb4 + %1 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 0 + %2 = load i32, ptr %1, align 4, !range !5, !noundef !4 + %3 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1 + %4 = load i32, ptr %3, align 4 + %5 = insertvalue { i32, i32 } poison, i32 %2, 0 + %6 = insertvalue { i32, i32 } %5, i32 %4, 1 + ret { i32, i32 } %6 +} + +; Function Attrs: uwtable +define dso_local i32 @input() unnamed_addr #1 { +start: + ret i32 0 +} + +; Function Attrs: uwtable +define dso_local void @log(i32 %i) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @app() unnamed_addr #1 { +start: + %_5 = alloca { i32, i32 }, align 4 + %iter = alloca { i32, i32 }, align 4 + %_3 = alloca { i32, i32 }, align 4 + %x = call i32 @input() + store i32 0, ptr %_3, align 4 + %0 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1 + store i32 10, ptr %0, align 4 + %1 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 0 + %2 = load i32, ptr %1, align 4, !noundef !4 + %3 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1 + %4 = load i32, ptr %3, align 4, !noundef !4 + %5 = call { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %2, i32 %4) + %_2.0 = extractvalue { i32, i32 } %5, 0 + %_2.1 = extractvalue { i32, i32 } %5, 1 + %6 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 0 + store i32 %_2.0, ptr %6, align 4 + %7 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 1 + store i32 %_2.1, ptr %7, align 4 + br label %bb3 + +bb3: ; preds = %bb5, %start + %8 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %iter) + store { i32, i32 } %8, ptr %_5, align 4 + %9 = load i32, ptr %_5, align 4, !range !5, !noundef !4 + %_7 = zext i32 %9 to i64 + %10 = icmp eq i64 %_7, 0 + br i1 %10, label %bb7, label %bb5 + +bb7: ; preds = %bb3 + call void @Fresh(i32 %x) + ret void + +bb5: ; preds = %bb3 + call void @log(i32 1) + call void @log(i32 %x) + br label %bb3 + +bb6: ; No predecessors! + unreachable +} + +; Function Attrs: uwtable +define internal void @_ZN9example124main17h35539225bd174e48E() unnamed_addr #1 { +start: + call void @app() + ret void +} + +; Function Attrs: uwtable +define internal void @Fresh(i32 %_var) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_start() unnamed_addr #1 { +start: + %local = load i16, ptr @atomic_depth, align 2, !noundef !4 + call void @start_atomic() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_end() unnamed_addr #1 { +start: + call void @end_atomic() + ret void +} + +; Function Attrs: uwtable +declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 + +; Function Attrs: uwtable +declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1 + +; Function Attrs: uwtable +declare void @start_atomic() unnamed_addr #1 + +; Function Attrs: uwtable +declare void @end_atomic() unnamed_addr #1 + +define i32 @main(i32 %0, ptr %1) unnamed_addr #3 { +top: + %2 = sext i32 %0 to i64 + %3 = call i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr @_ZN9example124main17h35539225bd174e48E, i64 %2, ptr %1, i8 0) + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 8, !"PIC Level", i32 2} +!1 = !{i32 7, !"PIE Level", i32 2} +!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} +!3 = !{i32 1453212} +!4 = !{} +!5 = !{i32 0, i32 2} diff --git a/benchmarks/ctests/example12.rs b/benchmarks/ctests/example12.rs new file mode 100644 index 0000000..05d20c3 --- /dev/null +++ b/benchmarks/ctests/example12.rs @@ -0,0 +1,26 @@ +include!("../intermittent.rs"); + +#[no_mangle] +fn input() -> i32 { + 0 +} + +#[no_mangle] +pub static IO_NAME: fn() -> i32 = input; + +#[no_mangle] +fn log(i: i32) -> () {} + +#[no_mangle] +fn app() -> () { + let x = input(); + for _ in 0..10 { + log(1); + log(x); + } + Fresh(x); +} + +fn main() -> () { + app() +} diff --git a/benchmarks/intermittent.rs b/benchmarks/intermittent.rs index 6a14b83..8803b29 100644 --- a/benchmarks/intermittent.rs +++ b/benchmarks/intermittent.rs @@ -1,14 +1,14 @@ //#![no_std] //#![feature(core_panic)] //#![feature(const_in_array_repeat_expressions)] -extern crate panic_msp430; -extern { +// extern crate panic_msp430; +extern "C" { fn start_atomic(); fn end_atomic(); - //add any externs, as from drivers, here + //add any externs, as from drivers, here fn printf(format: *const u8, ...); //necessary to import as the intrumentation pass needs to see this - static mut atomic_depth:u16; + static mut atomic_depth: u16; } /* @@ -19,62 +19,59 @@ pub extern "C" fn _entry() { */ #[allow(dead_code)] #[allow(non_snake_case)] -fn Fresh(_var:T) -> (){} +#[no_mangle] +fn Fresh(_var: T) -> () {} #[allow(dead_code)] #[allow(non_snake_case)] -fn Consistent(_var:T, _id:u16) -> (){} +#[no_mangle] +fn Consistent(_var: T, _id: u16) -> () {} #[allow(dead_code)] #[allow(non_snake_case)] -fn FreshConsistent(_var:T, _id:u16) -> (){} +fn FreshConsistent(_var: T, _id: u16) -> () {} //#[inline(always)] #[no_mangle] -fn atomic_start() -> (){ +fn atomic_start() -> () { unsafe { - // variable must be visible to the omega pass - let local = atomic_depth; - start_atomic(); + // variable must be visible to the omega pass + let local = atomic_depth; + start_atomic(); } } #[no_mangle] -fn atomic_end() -> (){ +fn atomic_end() -> () { unsafe { - end_atomic(); - + end_atomic(); } } #[macro_export] macro_rules! nv { ($name:ident : $ty:ty = $expr:expr) => { - unsafe { - #[link_section = ".nv_vars"] - static mut $name: Option<$ty> = None; - - let used = $name.is_some(); - if used { - None - } else { - $name = Some($expr); - $name.as_mut() + unsafe { + #[link_section = ".nv_vars"] + static mut $name: Option<$ty> = None; - } - } + let used = $name.is_some(); + if used { + None + } else { + $name = Some($expr); + $name.as_mut() + } + } }; } - #[macro_export] macro_rules! big_nv { ($name:ident : $ty:ty = $expr:expr) => { - unsafe { - #[link_section = ".nv_vars"] - static mut $name:$ty = $expr; - & mut $name - - } + unsafe { + #[link_section = ".nv_vars"] + static mut $name: $ty = $expr; + &mut $name + } }; } - diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile index 53d3c55..00d4b7e 100644 --- a/ocelot/AtomicRegionInference/Makefile +++ b/ocelot/AtomicRegionInference/Makefile @@ -1,4 +1,4 @@ -.PHONY: clean_tests clean eg1 eg2 eg3 eg4 eg5 eg6 eg7 eg8 run_eg1 run_eg2 run_eg3 run_eg4 run_eg5 run_eg6 run_eg7 run_eg8 +.PHONY: clean_tests clean eg1 eg2 eg3 eg4 eg5 eg6 eg7 eg8 eg9 eg10 egr run_eg1 run_eg2 run_eg3 run_eg4 run_eg5 run_eg6 run_eg7 run_eg8 run_eg9 run_eg10 all: make eg1 @@ -9,7 +9,11 @@ all: make eg6 make eg7 make eg8 - + make eg9 + make eg19 + make egr + make eg11 + eg1: TEST=example01 make test eg2: @@ -26,6 +30,16 @@ eg7: TEST=example07 make test eg8: TEST=example08 make test +eg9: + TEST=example09 make test +eg10: + TEST=example10 make test +egr: + TEST=example make testr +eg11: + TEST=example11 make testr +eg12: + TEST=example12 make testr run_eg1: TEST=example01 make run && ../../benchmarks/ctests/example01.out @@ -43,6 +57,10 @@ run_eg7: TEST=example07 make run && ../../benchmarks/ctests/example07.out run_eg8: TEST=example08 make run && ../../benchmarks/ctests/example08.out +run_eg9: + TEST=example09 make run && ../../benchmarks/ctests/example09.out +run_eg10: + TEST=example10 make run && ../../benchmarks/ctests/example10.out test: $(MAKE) -C build @@ -56,6 +74,19 @@ test: ../../benchmarks/ctests/$(TEST).c\ -o ../../benchmarks/ctests/$(TEST).ll +testr: + $(MAKE) -C build + rustc ../../benchmarks/ctests/$(TEST).rs --emit llvm-bc -o ../../benchmarks/ctests/$(TEST).bc + clang -S -emit-llvm\ + -fno-discard-value-names\ + ../../benchmarks/ctests/$(TEST).bc\ + -o ../../benchmarks/ctests/$(TEST).orig.ll + clang -S -emit-llvm\ + -fpass-plugin=build/src/InferAtomsPass.dylib\ + -fno-discard-value-names\ + ../../benchmarks/ctests/$(TEST).bc\ + -o ../../benchmarks/ctests/$(TEST).ll + run: $(MAKE) -C build clang -fpass-plugin=build/src/InferAtomsPass.dylib\ diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp index 428adab..445b578 100644 --- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp +++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp @@ -369,11 +369,14 @@ void InferAtomsPass::removeAnnotations(inst_vec& toDelete) { //* Remove args and their uses as well for (auto& arg : ci->args()) { if (auto* argInst = dyn_cast(arg)) { + auto argUsers = argInst->users(); + if (std::distance(argUsers.begin(), argUsers.end()) == 0) { #if DEBUG - errs() << "Remove call arg: " << *argInst << "\n"; + errs() << "No other users, remove call arg: " << *argInst << "\n"; #endif - argInst->eraseFromParent(); - argInst->replaceAllUsesWith(UndefValue::get(argInst->getType())); + argInst->eraseFromParent(); + argInst->replaceAllUsesWith(UndefValue::get(argInst->getType())); + } } } } diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp index bea58e7..1ffffee 100644 --- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp +++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp @@ -619,10 +619,20 @@ std::set findInputInsts(Module* M) { // Find IO_NAME annotations for (auto& gv : M->globals()) { if (gv.getName().starts_with("IO_NAME")) { - if (auto* ioFun = dyn_cast(gv.getInitializer())) { + Function* ioFun; + + auto* init = gv.getInitializer(); + if (isa(init)) { + ioFun = dyn_cast(init); + } else { + ioFun = dyn_cast(init->getOperand(0)); + } + + if (ioFun != nullptr) { #if DEBUG errs() << "Found IO fun: " << ioFun->getName() << "\n"; #endif + // Now, search for calls to those functions for (auto& F : *M) { for (auto& B : F) { From fd5d5b7ca4213f8faad063ad6d299e3883c772c1 Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Wed, 13 Mar 2024 13:52:10 -0500 Subject: [PATCH 16/18] [InferAtomsPass] Rename unit test folder to "tests" --- .gitignore | 3 +- benchmarks/ctests/example.bc | Bin 6304 -> 0 bytes benchmarks/ctests/example11.bc | Bin 6352 -> 0 bytes benchmarks/ctests/example12.bc | Bin 8160 -> 0 bytes benchmarks/{ctests => tests}/example.ll | 2 +- benchmarks/{ctests => tests}/example.orig.ll | 2 +- benchmarks/{ctests => tests}/example.rs | 0 benchmarks/{ctests => tests}/example01.c | 0 benchmarks/{ctests => tests}/example01.ll | 4 +- .../{ctests => tests}/example01.orig.ll | 4 +- benchmarks/{ctests => tests}/example02.c | 0 benchmarks/{ctests => tests}/example02.ll | 4 +- .../{ctests => tests}/example02.orig.ll | 4 +- benchmarks/{ctests => tests}/example03.c | 0 benchmarks/{ctests => tests}/example03.ll | 4 +- .../{ctests => tests}/example03.orig.ll | 4 +- benchmarks/{ctests => tests}/example04.c | 0 benchmarks/{ctests => tests}/example04.ll | 4 +- .../{ctests => tests}/example04.orig.ll | 4 +- benchmarks/{ctests => tests}/example05.c | 0 benchmarks/{ctests => tests}/example05.ll | 4 +- .../{ctests => tests}/example05.orig.ll | 4 +- benchmarks/{ctests => tests}/example06.c | 0 benchmarks/{ctests => tests}/example06.ll | 4 +- .../{ctests => tests}/example06.orig.ll | 4 +- benchmarks/{ctests => tests}/example07.c | 0 benchmarks/{ctests => tests}/example07.ll | 4 +- .../{ctests => tests}/example07.orig.ll | 4 +- benchmarks/{ctests => tests}/example08.c | 0 benchmarks/{ctests => tests}/example08.ll | 4 +- .../{ctests => tests}/example08.orig.ll | 4 +- benchmarks/{ctests => tests}/example09.c | 0 benchmarks/{ctests => tests}/example09.ll | 4 +- .../{ctests => tests}/example09.orig.ll | 4 +- benchmarks/{ctests => tests}/example10.c | 0 benchmarks/{ctests => tests}/example10.ll | 4 +- .../{ctests => tests}/example10.orig.ll | 4 +- benchmarks/{ctests => tests}/example11.ll | 2 +- .../{ctests => tests}/example11.orig.ll | 2 +- benchmarks/{ctests => tests}/example11.rs | 0 benchmarks/{ctests => tests}/example12.ll | 0 .../{ctests => tests}/example12.orig.ll | 0 benchmarks/{ctests => tests}/example12.rs | 0 ocelot/AtomicRegionInference/Makefile | 46 +++++++++--------- .../src/InferFreshCons.cpp | 1 - 45 files changed, 69 insertions(+), 69 deletions(-) delete mode 100644 benchmarks/ctests/example.bc delete mode 100644 benchmarks/ctests/example11.bc delete mode 100644 benchmarks/ctests/example12.bc rename benchmarks/{ctests => tests}/example.ll (99%) rename benchmarks/{ctests => tests}/example.orig.ll (99%) rename benchmarks/{ctests => tests}/example.rs (100%) rename benchmarks/{ctests => tests}/example01.c (100%) rename benchmarks/{ctests => tests}/example01.ll (95%) rename benchmarks/{ctests => tests}/example01.orig.ll (96%) rename benchmarks/{ctests => tests}/example02.c (100%) rename benchmarks/{ctests => tests}/example02.ll (96%) rename benchmarks/{ctests => tests}/example02.orig.ll (96%) rename benchmarks/{ctests => tests}/example03.c (100%) rename benchmarks/{ctests => tests}/example03.ll (95%) rename benchmarks/{ctests => tests}/example03.orig.ll (96%) rename benchmarks/{ctests => tests}/example04.c (100%) rename benchmarks/{ctests => tests}/example04.ll (95%) rename benchmarks/{ctests => tests}/example04.orig.ll (96%) rename benchmarks/{ctests => tests}/example05.c (100%) rename benchmarks/{ctests => tests}/example05.ll (97%) rename benchmarks/{ctests => tests}/example05.orig.ll (96%) rename benchmarks/{ctests => tests}/example06.c (100%) rename benchmarks/{ctests => tests}/example06.ll (95%) rename benchmarks/{ctests => tests}/example06.orig.ll (96%) rename benchmarks/{ctests => tests}/example07.c (100%) rename benchmarks/{ctests => tests}/example07.ll (97%) rename benchmarks/{ctests => tests}/example07.orig.ll (96%) rename benchmarks/{ctests => tests}/example08.c (100%) rename benchmarks/{ctests => tests}/example08.ll (96%) rename benchmarks/{ctests => tests}/example08.orig.ll (96%) rename benchmarks/{ctests => tests}/example09.c (100%) rename benchmarks/{ctests => tests}/example09.ll (97%) rename benchmarks/{ctests => tests}/example09.orig.ll (96%) rename benchmarks/{ctests => tests}/example10.c (100%) rename benchmarks/{ctests => tests}/example10.ll (96%) rename benchmarks/{ctests => tests}/example10.orig.ll (96%) rename benchmarks/{ctests => tests}/example11.ll (99%) rename benchmarks/{ctests => tests}/example11.orig.ll (99%) rename benchmarks/{ctests => tests}/example11.rs (100%) rename benchmarks/{ctests => tests}/example12.ll (100%) rename benchmarks/{ctests => tests}/example12.orig.ll (100%) rename benchmarks/{ctests => tests}/example12.rs (100%) diff --git a/.gitignore b/.gitignore index fa78942..b86fab8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .vscode ocelot/AtomicRegionInference/build -benchmarks/ctests/*.out +benchmarks/tests/*.out +benchmarks/tests/*.bc .DS_Store \ No newline at end of file diff --git a/benchmarks/ctests/example.bc b/benchmarks/ctests/example.bc deleted file mode 100644 index 6513a4190f5bf1e1cc2d48f15cd4b9bc4661b52b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6304 zcmcgv4RBM}mA=nRkovSQle|LFj6`^HvtG?A5+Fym>c!auj%9VG#n=QGuUi4%@rdXV=1G z_qK$yDs#c|%2Q%{?Q*K*^+Ib-!TAgd30Wgb!jR!4MOP^6pn_tVE7ZsUmD$JkSr^3* z=JdN+#84-l8#L(jfFh6Lglyq;Yyzr*NuGrF(yQ#3*iS^Ti({y8Z!{Q)h619twrF!u z+EFZRM-x-3iNUP%gUUo)iMrGYoBGV4^6FsLDMI}jK}p*LX=`!R7>JsYakO&=b2%7E z22bB)=<80p0m*;cOW&}{FSBx??0BT?2vOGOF!zhB;|}v^($XiYI1)FHI4aN-lFy9L zm&Ya78R>{*P2PV2)5|Ev%E*LrTBE!$sJwvG7d5JhPW5S<8f|ZrZV!lH>L4c-2BN{< zXt-F~5r}LPq8%tfs3$vBrw6kxXlNlzoJ|sk2aCH{%Ba}MkfS?I<`g+QW>Y5yvDoC4 z>TFzfbx=7?1f$!7QDa>+L`lPl>Y$|U=4fajYRr?i9h9=lNkR>-xGNCPpL2&nv({nJnm{BdNZWun5NV~N+uEY-BT;iqWD?*P z$>PCN<8;Ri-Ox+7TBRLz(&ll=r;IctNIO!`hDkci&f@Q-kPUJwdyj2rk0PHi*!%Z) z_+C7gwPoM4eBK%3fggULQ(9UVUaEiR^Y?FbUGGQUyyM*nv7iveAT~in<-Ky*A|z;j zbthAbWCTK1_=ZT7A$#kbEh`5%DFH(iBJ*lLB3G_L9HapPNyH$E8~Qlw*-fa9g)WXK z2(!8>-?ux0Xv7L#5^gJEM|v~=_EsGsbV8Sc`yjFkn!Oh$k_1XZH{~EgK#oJ)VX*{B zfQSVc2#K8=e`heH2N5YrnDOkJaROM=sDP=y`)HOtH9tnv%3EKRsZk!xFJLI8{O<2g zp%E5(_q1C1PCu+05MwkwUCTd~|G{*>|FK07P;p8Ci-CcUxSk?O0k`4CQ#cOgc+@N= z7E%cI)ye*wh+v@$C|ux#=RTkoAOr=3O(BRVGExe^3Zc;q0@o#-x?ho>UESNWVEJRG zNJp)>hP>>2eNT)a*P*JS?)0F!8IV=<+KDGttmQVLcOm|c!&f~eC{Ii&{R*NY<}i0B zd+dU=y-gbK-zK|)6oZu)KSqRsu*kQfoy*PV!bLF zgtQNeTHB&QHkv<{O<(J!r#N{dC%-g9&p2s4b3%1dqdc!sC3v-;dj+TTxWjTpRDL{C z-YqH{n^CmZNnZ#^O}LIoO#;r$U>(#5Y3r9_pxrJ+T@e3y(NKT1y}xtSjT!pJBz=2? zzQ&2%qmY4v%CXdQf>)l_C{I8d^O9fpIxM3OOMmKlENO-c5)+m6CI1>Gz1vlGPEF~AsQr00=3&R0{gJXUXVI)7by27_ZT&UXw|eE+1M270r+MX7jp|}t^?gE(71pR` zc*({c5%XBGY}io&4!N5spLUeRpquPDnF*Fgw}UmwShDg3laB{Qu2GKVYd9Vgw$(3dzGx?fC6KAoheo%EMn;m6l_WujAcerm}zjp~Xv5ajx4yzBwLe=Z2!gVnJ-#(##%gLuE=})clPX}O@Bq0hP zl!7g;eh3xp;pTBz%Fb?^Wikn_tALdnvz{1$s;Zi_sV~Nr{vr=YPvP~I1IcC1p^3EQ zNyAl9*Ff||0mi=DN0j#@%lne%amN}6r!nxDtV*-U&Z|MyGjUaIr^2t?!pU!P^tBoK z5^84=S_yFt5^naet-errYf~euEivg!IJVg1F&cTV!Dwc6oKC-5%dO`0D`1{N>U3$> zKwTlV!I-p?CuI?hLQf`TLJzsTxX@_)7vsL8ro(HF>Xzt_4!`$)4SA_*$F2{5^5G-9 z-X7c4*oTO>AWv5V&%(@5F~q;LV^gpne2(2^8BcayQJxyix(ZuS94}2et`-|E)zP0g z0t!-fah)E0$Ez_GL`xIbxzJ#{Ej(U;749uiYIF{PYfmD4>fyFss0?5;5wk3kDJ~f)Loe^3I(i0;(jsldX_Ct8$qqpiOAgW1iVL zDZ}S3eDQ3$l+MkG$%i&T9n2PVk|8d)v)E(JOiH?g%t*`@0%mJpon)O&wv@43Q4xFC zW@ZpRD6T8Gdwd6D^=@1qs)~Ki%K2~Ge9K4?yK67?awAJJ`3XXCmRy?)qVx24<8j=_ z7tey-@XjR(vCxyJKvY+JN?a%*!K#tX`3KLlN(%c!QgHX!4kqVa#qxr?M_!^{ehKX` z5Fe5EIWz0l|8sh`NO3Moai;xw-?qLC$)HulrkyriAD2v7K~4$St3V5WDB>}D5Unzg7>mU(U_66b6Xhl z3yEV9I<49vEo+rbNWM3R;u+L885+#!A6Qq9BFGPN;LiGm6?D4Ucuel z3zP{v%DTOIgdu-7w_@0StlKj?-+26}?6G~&OkF%XduAhXw54V4Xzt#g8;6Ck44!9x zd@0sJoOg=|IHRC@!3k}ieOPwr8)Q@G#rbn*v-9avd|AMoOUBb#VK^7F;p{NiaBe4F zAbL>%;uqU?CwuclW2yOL%lD*g{9`&lq=)V9B1SbAJ!!k}*#u&{rLrdvj(lD5ButFs z;!e$~AD~tDr@j|!l7R}v~~ueBnEj7>;GFcgGlw{RH*Ox2!RlreJN1(b{WirohvE;yAl@XDmDCroiOe zYoBHcyxraBSZ}v?g0q(fGerkU6t`syrwLL(Y>oI$CsY_&abJ7|)HIT60f`uiteX89 z;x|Zvt8p%P6F#86#0u<>pQ22NB&$}iL&uplU`2>qRnGM#SAYk|tlPoVXPb~fYX#O? zlRdz~336kf*op|&%;ce!j94sLuozL|g=ic8hTu#HYIlpylU2?q1@S&{VB~2*niR*Z z&k75=B?rd?!oq&ZDEFL@^|s{d@biNFq=cHP7qTx(0-cR89KH7HUio7`TQ>5oCI9hq z-V49p`_;g?UxgP(^sOJ3W%%GAe*k21M13r-AK0JE;DObzh8v7Qo8ZP5elLMr$iO{_ z3y_2D47I~s4R5^0_?r8U+gKi7`#{DCZ@fR?|C;!}A>t7F1n|Ft{&R5SeH`Nioc43T znd_n72Y1yhj^&>L{2Ay^!2R?*{3zgflQM$BYv$pf&*H&9z*m8>Z6J3D@a^;DhXDT@ z=xe~n@6E&i6L6qH+W`la)B1y+U(Mnj^Kb!h5fpnY|I$1h0WJmn5x}vJispF0u2q1; z4-Ekhl=u9j0}$uLTY%<q(Z+&!d3jee0)yXU@Yn01ltgzXFc)0n1YWd>?QH^lt*5 zI}hJ8%P$6_=J>JvKg{xH13t%(@k4-P{gwib?VHo*p8&^l_`-3J!XY@`2ORhDjq6Mb zhkpQk5D*5K7hOHi#*B5LK zxSsxwKHTipa^d!{%hS}*(9~#g^PcCML%heQGrL^)bmwyWwrp*5g#%3?XwQAsngUv_ zrBvtf8?`=dsn+asSI*MuLd{yPo^RaZ3ODo6HU)USyVT(4-CDm6B;A&;k%SqGT1@Vu z)WcKX6mALmpzXzgw+MKA24HdGUfZFqp0Gy2`7ew~gtl)AMhT}s2RWwzWS2Ay<1v(N13O?rdL z%WHMqH|q4jNTVJ+Xz&`nZm-dxGtZkd*c>vKdP7Y?7dSpx4~tP$y|D;Zs-z^;TvBr1 zw-_#}b`}-Y!u{CBqLPyPrat6AEC3374F=w#*Lgku(o)`_Ghu~G4Xy{MT6A0yA1=bL z)U3f!lgAehmy~Sug&MXt^4QIo&KU9qAuNEE(;2;5x8Cn9HCarY$zR#r5Ug+7!Uuyu zZ}PSA(DfM__^pi~%5&I>BbeZ#>q&Aw10 zUys?myq`0fc%#?LX$_paG8MJHfGY_9N}51sYjb;w)Rkt%r0G^#of57@w;GRGo&Ni> z8iXNZSg(OIc-*|fr?+_hrIu2YmNOeI7PHyw^O`(vpH|Bm+@(4{A8IffR)eMWzSRx9 zrzzZ~)dD9sYjg<@!}JEfM`!kGO}yE-+OwslgmcXr_P>^V^Bnc3vhRaF z|HJ(ICh7;$=-~wXvxh#Mf%k^~Kc^tf`+tywE=WQbWT6YEAx!Zn^U#$_#M$gfm+!d_ t&KjV=`vorWe{L?Ow~z;u@nPcGbjSHv?V8)LT(djYoCc{aE49U;{|1He4g3HA diff --git a/benchmarks/ctests/example11.bc b/benchmarks/ctests/example11.bc deleted file mode 100644 index 076c5ac57cc33f1dd31c34eaedd8b91b7efcc204..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6352 zcmcgv4R8}zet&DNw6ZO))>s5Pk;KaOafX7s`miivd#VpiY+?@KbM7diBd@*?i9duS z8^eWUZ5c~ROllZs&P}=2PMA4JFRcTFftJizlHq)ulXG&JmT(@%J`5o#yCE!z2Cm~`+vOu|NovYjV#MXa4Fy(&PV9K;Ii3AE|md51Ll%(O6yOv ze$trzCPVF-k_4tC!3oJthx`kRyqTd|0#pdV2?=zkS^Bzzwkg=}!aR)(q0=ISvNJR& z1Nx7^O?4I>u#S^|6}3sHSM$ zl4vhuJ(%GclfI>3?dxOg&j?+8cmAr9RejoLhqoPI&yE4_3oJsQIx6&$%rRS6ZEih0 z4(v=gYO*p{RG$;u>Q_ihUoEmMDZG#&MMBPqk}zaANYPcPWk?}qnXA%~L1|V$+izJK zKeA-N#Ue&}*0Di@&JQZ`rJRr}yoyagn?cAXct3WHy^H-s1iCmz8tRJ%{L!Fa)X@xU}RGc6x`t7Cxk!8Ye8cUk{MO7!_rV)D;nnv>3 z5&Fu6{69+;j-F;+$2L&s?HDPT-4A)jyRVjjt!M`u~MU=haty$%1tS9cHF8?4B=r@ z)2a{Rs%t~a86ptf6^I%eqd_SZLef1_YPTsG9E=+Csg5HQtDGX#;EIcbs$U~i3qGX4 zhk0F%HnxtDxNKZqes^MkQRd+%gZ-R044SnMi`M!h0fFiOUVo%r8r|6u-8~XD#YE)- z{31C#cxr;)GfOx3(d`y$Pa_qckbJ^WL4n$ndNxndA$A@=oH|{WU5Z$$-cLh((MtWJjl}AuA09#y8)6ktr6>UY1KP0I(FAQUTqczlWwtf>JD%5Pmt zBN-4|07WF$>f(#L)27@L_saLre@WaM@4qUNN(C6f;(4HLPs5$EYD>y1L<#}TH)yXC z85ZbaUx5fg^W^@Oat1Ak1B+xwpX=>Ih=b2q90<0!B9`fo2qdfq<8W=#sRtAVxpjTL znJXSYN80Pfo5?GVxq~r+T#sssyHm5~T?lQ9=%vw(E7x&b&|46LX8_Sl1?A|p(x)J* zVs=w^vezb1yE~}Rz)sm!B)`GXw**=*v#Aqt6{N3^6Q_nn9vc@`joGVuMOEV+y^!%g zfOxNo1|anVqV|qxfQ=T6=hD~v=xI*g!pSes(z6a)&y1=rX_OZVzD~j|K;#y9YW~e>O{h zHbvhap|5iy*BE5ska9froaB|~HOf&)WnS{HeRlJh-8_(bo=Tdaio`?}{mDNEp>tgo zABrl5ld_-ghz7PqgMmockCQE&9}N#i3*P&#{4*rK>7Zx&=*tVmUC)-NCgZrCeV|c7 zQJI`l&P=O5CDgwB&8A`d+C!0waYymIA&pU}Hys0;>u&YQu?N&otIzYwYZ}$1xav7V zjTP3YW_d}&zKCf&Sut#{0*A~cDrf8!G3X|H&t`$8(OsaA6qnWDKyD+V~1Pas?R18O|PDCE-PMUh{=J90ZsknJ82{u-a*PPfXr9i8l;J_`> zf?u7ZuQ$qXOi5;0`4tXV**gxpg`qEVG<3h1l6*2n&p7D2T+y$u^U6f0>caH0>l)Qn zjcQb*f)%``S?*(hRB^^$bxLIJkC?}|oPpH`W1;fxslm0d;O8IHzv1N5Q}ibm`6q)Q zB}s@vM<}qxIRK%8Jsh5ZrR?msnx~TBx+++yam(l+R94lLRedS0^cA}~dK#~<97wKk zc1@%$p9m(gw(p;Zv~L3IslkI4!$gSr~FsI&LLf%n%9G9{i$tU1O677DxBc5g7WqqT+AmX_;FIkv>@HX3=4!DwQ2oKC++%dO$`D?v_? z^n6*@U}KSVgE46#&&nbig`P~xgkEw*Ns-a`FUB99EI+pPq;8r1wNp2{@Lv1P&!lu&*ADRP`q_o`J+vPEIQ) zS>+6mpVvCo30^rdq#R2J03Rphw^;eKLk=||JC42B_v>95|=@qg>wYF^GGW$XQ{E|xo7?k)y(nB@xDE+ zn$ax(&s4?y9Eu>Wfq7*0ub9=~SBV&PH3mG17(obZer0Emh=8LOx|6G*95s2&Mrae- z^qi-9reyf+g)g2>SLFqnnEdbtsDrtJPI8#b>nw3wvXYYS05cM^27%eq-zZscl|9DT zEU1`0W;HPg9~Rda&YjuESUe3Yf;F+vSvmi0t9LmmV)wryeZGYynSunN_<&rO2SXR> z@!Au(k1w7DTi~5X5@Mk@UxB1u@o909gaoTbwiO(?z$&HKcap-nQ~Q`DZz)z3&YjpR zeSR<6XCU4u?{j7~*8Vfz!{FjsgqFhlVcYsMBtsSvn|9i8ATF70!2-=4?G?K!nerRF;y^?|bXI6 z_V^E>nYwruAvv_Z+BcjFjsQ~0F9xwF*yz)KS>$d&(*8}I+ejR3X*qW^bGH{{FO0#u zE|#NM4{_ctB5?fP3(k4IFONO^HDe!=oy#t!Lp%#|fp@WN9NX!rnrKKb|5QUFX2m&U zdXjTFu-Pvw{1DY?dn8M?J;aF*D0yUjKGP1SZ2u}-9@5V`x0F#WL{-{<_{;(^-&Wn5 z59hb8coIZof8W`>`en5GzUX@~C>eYpXCBn$#XqP^PD)O?uHYQUYhY#zv=+pWk(qz! zvW{6&S&*`Wt9uW}48Dd~h-G-cPX5e6ux}uZtvkkOY@^Z_9<{_|n!B> z=oxlhPb_!IO@S$})o*1AJ>A_OvYu|wBxfrNWQmSImfCWK^8_g%cC+}$4yZh`lK%Kg z@HI)b1+o;eB`C4ZsyUQEg#>bB{unPpe_|!}$J;0i)V62^8+05wgD{y}&A~>u5|3-X3z5)23LH}vE z@&1l+!T`MB%v$L8!(B6vVS61t5q(&jJpnq;LW7?0Nk8MK}Un4*0iW zTo>Rg7I?reBjE5uM;7w~UI`%1i8leqf1noR)&h?Av(tcQEy5cB2QerKIL-$=UJ5`A za0T@50FGS&hsw?e1RY1CjiIxF9#gky`ax&!0|YI;W$X)5FAOs zaUb8f&ZKad_;bLqUAVU2lZy%A`YQlV!@59Bo>D=-J{(nYx54b z)8h?<{m!l5(TBnwEf?Aya=Kfan_FAVF5dl2ILN!bI+N3hPkm08cl(YOXUN|gg!aN$ zZMnak)0gQ@THaUY)*E~tbM-u(E*REwO?=CCXDG}=yWDT)%|>^b%Tva2Wj?*u^#?|R z#NxJcS8?j$ZfXs+1-;PrV8BxhJYH?N(PQLvT7${sahVO6$KY-adiAY=kjdB9;ts>< z)VRK7Q;XYc;M{yuld~0?_gUwi5%TY7et=o)Dsy?u^#-%S=+S#!2)rh z&czwsUZ2ZYX5#+`GhIHeoHywVCXl%t-rAkYK z;nLFkzQu5Houjz89`45*ic3rHoBEIg@c>ZR_EoPBdPDyC=u8F9eB0aNSrl02 z`^JNF!rovD-vn&DhX-et^G1({(;7Hebt-bberEvwv9!XN9pT+6QfHbKlcrl~bxOHX z-5NY+P5Q6R8W;>&!+H#y!R_J=UcK4lD>IjsYdMqAY&MxZUQfB(1@?f&WjY@pY&II! zfUQm5HO;)cHPoTi0w;IR>{1hF@|5YloSt`^%Xpt@jeC1rDd(Iw?f)zV7rE+><=|Hj z{a+*@5dEpL@aw4mNE&|gsSoAhy{Z4ti3oE41epkV0;%YPTy)}O1R>ub8=a|ioX?ST yAz$d=+yN52bKp|{O*1mRnLL=14-?NPJJ(*N6b{?Uhv=YI1lP0n%k(?#oBki000?aPARJ}zm@!oW5%y& z$>vegH%j_P$ZIzF*Cu%#EvffO0sxMX&>drBS8Xy&I&%-qQ%VuqMj$jJL5UKeKN~Jd zbIxw_5K&IjYU7iluykP}m6=UE=H$~?Bv_uDVcAcs(+yO{Pm|SEZDqbeWlqV=NIPwQ zGQmAW9ZYAerESb_MCz(=#*(}x9jb?WAKcBnIRtWdFbK_q=K?Q5AF{NRr&hsj_r?KR zS#r{%6(bP@3(h1^h)?NLh_nedLVTGr^`uh_eVOX(rjk3&i`~TYJ4Z}O zmdvNC7TQ*@FB30hP>8UoI;$vNotH=^NF7A?3+OVHfGRT(s)RM?U17e(J`?#`SegoS zM10i|f3>)&DH8Naw&qE;pmSqc=Xz4k^e6`U6{sb9z?}VVkK#&C$_Y{SXCg|{#7i3U zBD(5`0qKUC$FVFcO(t-%Yqacho2(AWzwD5GVUk~Bi{&DCO9+Q&-x?uf4=63CNmtqyJEBU{mcD0{Ry>ts*LM@ku=l9)<}4)x@=iGs4*7UVlLH(bYNR3?n<88KB8E1xo`+8;g#)jR$hMBl>N;eNCdpQZBsfBT zN=y8_WUFwm8?!_?6-@1PPTwO-U`f-V#i z2xR&?mUyo_M0_)JvBP?i30C~3IMB0@ksK5Xc(|=KBUYg(= z6NUuGI@Dd_WD1EV&4dZ9YxZSU%%Wk2pbDNVeW_OCSj1o#@^~i9&8XV3zPrLCsDRA` zW!U}B-}1c(8K47`q0uI1$iyv&C@;n8%i&UIcV%RzR&=x{En0bkuvR5j5|?bp4jmNb zAfgnN<%aP*XuNO-6~r?Z7VT`INSJ&Oa{M^p8j4p8jw!t9qT)8IAslVD@RBV}l0er+ z>18DUoR(eZWg4j^d!RoHXu``T3d2J4hQ-B0*5YbuHv+@h$vT>VCLl0)1S1QgZvj(_qFS`TN#|RXvxafGOC@d}<9#3y{ zNPb=|(PPe%=y}WnJ_qoTr19HC5WAU=*rC2MBL1$(magWdxT;5GH~M5(S+R4dM{%)7 zF)ZBAaf*{l#UM}yM}E*@1x7G-3HM`B1F%&aRD1MqV9|+|!qeiy-l+7gnuu?0#P16Q zt8pO*Ga|w6Nanlu$iG1HYc|=%4%vmN4MoG`Wu5ECy!oC|0pxXVRB>@E>oZZdH>1+f zYhAuKR5)zQoixM|0bXnBs;s!)A;%ub-jIEgQ(RGIo$t@uCd$SNE3?Kqa?SRTVK`dY zYb^$cObirVv=+8OH`@MYGFTef4Ej)*zWrUz4_=^USAke4*BP!lKJ*0vM*Wr*r@DNll;?eSS2bF2VRtbE%q)* z73|^Q2!ygZY&MQY!F9zDsbSM#HxPZ+s5$$5zrvgAVr64EzH$(9iM1+2n=&MUvWUGq z@-Po`4|a-*+M`9CQNxIJIi%AtcuZQRoMqv%LDhHrv#OfYy^8g${2D8}Ixf3_wlD}S zg}nD?moe5hX=ajNUB=`zx9{G4s-&CFa~G-9md87k`wVmsDnCRbTe+2~@HBdAZRIOs zFaAw|po?kr_yaW!^^7WCugPbbJeNzS;&P!>8S>QH2mN@ay3#e>@n~%>4V_6rK^Z!$z z#;@2IA*K;;o~5!@@t z^l{^u7)wvBiG}Z91$>yww~+hUwC22OQ*x9H`{=$la|=keJ=IH=nx*q-iwWg2hs*{V z;e+bmFPJ#Khc>y_H2TZhzGCFuQnM$Q5Hq`8q8_bZNYI%BqV)HO64)x@^>_{+NrlES zv9oR(Axh-iGtv>&(yt=>W7ZTsKrYcy61)LmpWM4(;@BQ~=E3yF1rtYhQ;$A^wrfSF zh@18$J1UpMj*1({AcXgGu#ZgkQ6@*o$4H>>RhLBExvem7&DkB}@83I9JMAX;Ha2e- zJa6?&R~6Y~5;HM}_wwdm>;itI#+mw&&j7FRasq7>azq8!v^o zcEKlcAp#`kTO5Nl0%vJ#ol?+aYVD~pI9cM?IbAA-Jl~$s;uv)dR9jH{BC5YutVD-X zn(10vM&R5iwUiB3ok{xY{lBft8(jU}Ei$qF@iNm(q2|J35spE825?O5cUJsZy#C>o ze(ZBAjyjHcsT0=w_Q+EikdE7?5)p7bUkA^r7-L_CSMh6-Z1Ym!72>?hN$wjtYs(X7 zZs_Pf;S15-{YyhM5hY8;q!UuIuvBtj%)|r@SNN1J(_ni#|{r-u5L}=`%6IPFN<6Jdpu07kspK=lF#=EFsz^ zw$c}mS|*M?(e>E)?_RJ>y!r&z562(dw~p+A_*)OuW_H~DVU=m{Jxu1%I3`w$X+-~Q z9PHcq@0J+PWa3>c$S46>X~c@@Itaso?I}|w_D>bLk4-z3=V74^gpN~4u2Z^N6E%#k z>!@}WRK^K-limf^zmf9hFKpc>+L|%{nOrO}Sq*Qb%n+&p`>#T%1{POCo4E(Z88(c7 zM_04x`ir|oJI7l2gwXvYN0myG5z%i_hgPnPy{!^&80 zV;#94{JZKv?cycQPw|g^XdDwehJ$Y>+58tmo%|plTP9}FSySv2lZA5ucy)(4HHRc1 zqu7GEGQ2jvN~@MWj+Rbo6~|X(_xHFN|17wv&trd7ADrCrHG}KR$YiQah$cc|=_?Le zZNvE~*uqx4D~9p7^)LwGO>!Uc8=HuE87VE{Hd<*Jq;||TwMmr=O4?|ZrAkIGaD@kt zGbOEUsWY$fbf%^10eXQu96rss!|rperNEagevv?CODca-MDWN_nRvhkJDim5>|c!c zfla<-Fk56%?oCL1j^KTjwxk0rUxrZUz+&u=fA%E&_{v$`4E$nocakO~V#??Y zw1iGflqAnaR3ZdrBYcTDP7P*-#orQTw)^?SR}-s89^@rw6WbU&pVULX$UMZ)c%2+V zb$rTM@=9+#FW)Dj4sPXBk4W-*9_7=*RN1LEe%3Fk7n^q?^jPxg(y!BIPdqbN`6#nR z{pxp))t~HsUi|xiD?d61mO>tI!t3yyDefG_^^N^M4u)fkD&PY95ZEd-{KWs?j3&hT zYM21WFSH7t3_Nj8;5PL=e#hf++W=$0@j`z!Jn>%=2z>b(;CJHh;aW0@i{Sin3Gj00 zcftkHm>!=G(RdL0=ipiwhwlU&_w}Ig19A8zzzdfIkKOhXKbrP3d0^M&Ad08XR|T9R4qWyP)p{+!=>2fDPvn=nnz@a2)ZbA&0RXj4uZq%l{xw zKE_u9j^%$GCm-WgfMflx0uG;PeeMGsK0?EJCg=&t#{r1@_<|6{aF|#FI9?ZX-IQGv z76|QU0Q5i{A?LqY3jM|)=dAVQv*oMpcbo58!37)YYFu`=#}}-&KXA7u5Ok~9z?OjB z)lgU0P;Yc{u7`qt&gD@X>~{RC$L{p3uc@~OsvG?9KJ~3iUu|^h^*X)I#pw)s&Rw8h zF)2>%531N&u715e5ai%pU(IUuTD9Bdbr*OIdY9VxM@GVmxtsLPT;b-bZ3t}gd*I!T z0e3FQ(P)f%73*}n3?2`sGU%}!t=^tnu_m`hqt4x=W^=iK@NhNwJ^A@HL61K_-w&!l z=boU)m%DNe$niG#H*rI6x{T!&Ssr#PNYEEq^&}p>>y;0-Vx}1MfvT<54=)}2= zde&g@78DqbDwo>z!)Lm5oY4)A=2(_<=`^03Gkrn7p}_5L@Yx}zzFLSg_8uew_$ip5 ze=}?t&aJTJ=2pQ4Zp+WVY3ePpiTSl!tB0^SRXQi9@jy=8bfOkVqCg0QidDq|R_Cj| zB~*}jHNikEaFD1FymFX`--MwMxVR}0nCakF;TU6;%~%zDY3n^rIQKLvgU;*qx>Oo9 ztKl>{?1}=dpu)6yafu^%!sl;rc>*APjmKYCQ_tbhW0^X?#|KpaQe8TiQ(K^RY4vKO zOXV%VQgs?ZYPq1|ZSru+I9Ee$t;Yqz3m%;I#ujkIO#kGnLb*(->SHX925qy1=@c zdf%pCZNqxb=L0p2o+b{ac~oj`9amG2om^0$F{;%CPADah}byC#gnYfysAI zy&D8z(!nfYyqRv0j$n5^R}0cOH|J&bdQRtdvnnm?Tp`eqr`qm=zh4?)Oigf$Ak-d{ ziiO7BV(R3x`RZkO&a&A5G|ONxP$1*hvQU4V)}t}Hy#>Ys;1Yw*Xfzny9=G1*^r%!| zZh_j%`RjDrWngQqXIUNRY6vu`R3MX`HalPKap^T)x61{z;Z_%Dmbun%%4hA9rv0Ds zX`HM69HHKR=>LLILC}vzsaW=p#HoKu^q(QsA3XI|R-K;u7pB#zb^l-SDk$}15G#oN zv6&T?_^VMX5Fc=>9mv&=*%egz0rYAY_;r%>V#lnh4kkko@;{~6*irBM9DA$aNfyU6 XTVbC%SlTB~mQxxCM@!)-I(_>u&eATE diff --git a/benchmarks/ctests/example.ll b/benchmarks/tests/example.ll similarity index 99% rename from benchmarks/ctests/example.ll rename to benchmarks/tests/example.ll index 1fce17b..edb717f 100644 --- a/benchmarks/ctests/example.ll +++ b/benchmarks/tests/example.ll @@ -1,4 +1,4 @@ -; ModuleID = '../../benchmarks/ctests/example.bc' +; ModuleID = '../../benchmarks/tests/example.bc' source_filename = "example.a08634fc28d17a86-cgu.0" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example.orig.ll b/benchmarks/tests/example.orig.ll similarity index 99% rename from benchmarks/ctests/example.orig.ll rename to benchmarks/tests/example.orig.ll index 7cbde04..921c0c6 100644 --- a/benchmarks/ctests/example.orig.ll +++ b/benchmarks/tests/example.orig.ll @@ -1,4 +1,4 @@ -; ModuleID = '../../benchmarks/ctests/example.bc' +; ModuleID = '../../benchmarks/tests/example.bc' source_filename = "example.a08634fc28d17a86-cgu.0" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example.rs b/benchmarks/tests/example.rs similarity index 100% rename from benchmarks/ctests/example.rs rename to benchmarks/tests/example.rs diff --git a/benchmarks/ctests/example01.c b/benchmarks/tests/example01.c similarity index 100% rename from benchmarks/ctests/example01.c rename to benchmarks/tests/example01.c diff --git a/benchmarks/ctests/example01.ll b/benchmarks/tests/example01.ll similarity index 95% rename from benchmarks/ctests/example01.ll rename to benchmarks/tests/example01.ll index c38981e..1c44e5f 100644 --- a/benchmarks/ctests/example01.ll +++ b/benchmarks/tests/example01.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example01.c' -source_filename = "../../benchmarks/ctests/example01.c" +; ModuleID = '../../benchmarks/tests/example01.c' +source_filename = "../../benchmarks/tests/example01.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example01.orig.ll b/benchmarks/tests/example01.orig.ll similarity index 96% rename from benchmarks/ctests/example01.orig.ll rename to benchmarks/tests/example01.orig.ll index 68b2445..9692ddb 100644 --- a/benchmarks/ctests/example01.orig.ll +++ b/benchmarks/tests/example01.orig.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example01.c' -source_filename = "../../benchmarks/ctests/example01.c" +; ModuleID = '../../benchmarks/tests/example01.c' +source_filename = "../../benchmarks/tests/example01.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example02.c b/benchmarks/tests/example02.c similarity index 100% rename from benchmarks/ctests/example02.c rename to benchmarks/tests/example02.c diff --git a/benchmarks/ctests/example02.ll b/benchmarks/tests/example02.ll similarity index 96% rename from benchmarks/ctests/example02.ll rename to benchmarks/tests/example02.ll index 5a557b7..c6886ac 100644 --- a/benchmarks/ctests/example02.ll +++ b/benchmarks/tests/example02.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example02.c' -source_filename = "../../benchmarks/ctests/example02.c" +; ModuleID = '../../benchmarks/tests/example02.c' +source_filename = "../../benchmarks/tests/example02.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example02.orig.ll b/benchmarks/tests/example02.orig.ll similarity index 96% rename from benchmarks/ctests/example02.orig.ll rename to benchmarks/tests/example02.orig.ll index 550dc07..9ec0125 100644 --- a/benchmarks/ctests/example02.orig.ll +++ b/benchmarks/tests/example02.orig.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example02.c' -source_filename = "../../benchmarks/ctests/example02.c" +; ModuleID = '../../benchmarks/tests/example02.c' +source_filename = "../../benchmarks/tests/example02.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example03.c b/benchmarks/tests/example03.c similarity index 100% rename from benchmarks/ctests/example03.c rename to benchmarks/tests/example03.c diff --git a/benchmarks/ctests/example03.ll b/benchmarks/tests/example03.ll similarity index 95% rename from benchmarks/ctests/example03.ll rename to benchmarks/tests/example03.ll index f642b6b..a156f6a 100644 --- a/benchmarks/ctests/example03.ll +++ b/benchmarks/tests/example03.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example03.c' -source_filename = "../../benchmarks/ctests/example03.c" +; ModuleID = '../../benchmarks/tests/example03.c' +source_filename = "../../benchmarks/tests/example03.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example03.orig.ll b/benchmarks/tests/example03.orig.ll similarity index 96% rename from benchmarks/ctests/example03.orig.ll rename to benchmarks/tests/example03.orig.ll index 89a0869..5a4464d 100644 --- a/benchmarks/ctests/example03.orig.ll +++ b/benchmarks/tests/example03.orig.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example03.c' -source_filename = "../../benchmarks/ctests/example03.c" +; ModuleID = '../../benchmarks/tests/example03.c' +source_filename = "../../benchmarks/tests/example03.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example04.c b/benchmarks/tests/example04.c similarity index 100% rename from benchmarks/ctests/example04.c rename to benchmarks/tests/example04.c diff --git a/benchmarks/ctests/example04.ll b/benchmarks/tests/example04.ll similarity index 95% rename from benchmarks/ctests/example04.ll rename to benchmarks/tests/example04.ll index a3a1d72..1185b60 100644 --- a/benchmarks/ctests/example04.ll +++ b/benchmarks/tests/example04.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example04.c' -source_filename = "../../benchmarks/ctests/example04.c" +; ModuleID = '../../benchmarks/tests/example04.c' +source_filename = "../../benchmarks/tests/example04.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example04.orig.ll b/benchmarks/tests/example04.orig.ll similarity index 96% rename from benchmarks/ctests/example04.orig.ll rename to benchmarks/tests/example04.orig.ll index 32405f4..c177c2f 100644 --- a/benchmarks/ctests/example04.orig.ll +++ b/benchmarks/tests/example04.orig.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example04.c' -source_filename = "../../benchmarks/ctests/example04.c" +; ModuleID = '../../benchmarks/tests/example04.c' +source_filename = "../../benchmarks/tests/example04.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example05.c b/benchmarks/tests/example05.c similarity index 100% rename from benchmarks/ctests/example05.c rename to benchmarks/tests/example05.c diff --git a/benchmarks/ctests/example05.ll b/benchmarks/tests/example05.ll similarity index 97% rename from benchmarks/ctests/example05.ll rename to benchmarks/tests/example05.ll index aee5708..2902ef0 100644 --- a/benchmarks/ctests/example05.ll +++ b/benchmarks/tests/example05.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example05.c' -source_filename = "../../benchmarks/ctests/example05.c" +; ModuleID = '../../benchmarks/tests/example05.c' +source_filename = "../../benchmarks/tests/example05.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example05.orig.ll b/benchmarks/tests/example05.orig.ll similarity index 96% rename from benchmarks/ctests/example05.orig.ll rename to benchmarks/tests/example05.orig.ll index c9e181a..ccf8289 100644 --- a/benchmarks/ctests/example05.orig.ll +++ b/benchmarks/tests/example05.orig.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example05.c' -source_filename = "../../benchmarks/ctests/example05.c" +; ModuleID = '../../benchmarks/tests/example05.c' +source_filename = "../../benchmarks/tests/example05.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example06.c b/benchmarks/tests/example06.c similarity index 100% rename from benchmarks/ctests/example06.c rename to benchmarks/tests/example06.c diff --git a/benchmarks/ctests/example06.ll b/benchmarks/tests/example06.ll similarity index 95% rename from benchmarks/ctests/example06.ll rename to benchmarks/tests/example06.ll index fad0c8b..3cf6d2b 100644 --- a/benchmarks/ctests/example06.ll +++ b/benchmarks/tests/example06.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example06.c' -source_filename = "../../benchmarks/ctests/example06.c" +; ModuleID = '../../benchmarks/tests/example06.c' +source_filename = "../../benchmarks/tests/example06.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example06.orig.ll b/benchmarks/tests/example06.orig.ll similarity index 96% rename from benchmarks/ctests/example06.orig.ll rename to benchmarks/tests/example06.orig.ll index 4aea90e..e2cc907 100644 --- a/benchmarks/ctests/example06.orig.ll +++ b/benchmarks/tests/example06.orig.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example06.c' -source_filename = "../../benchmarks/ctests/example06.c" +; ModuleID = '../../benchmarks/tests/example06.c' +source_filename = "../../benchmarks/tests/example06.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example07.c b/benchmarks/tests/example07.c similarity index 100% rename from benchmarks/ctests/example07.c rename to benchmarks/tests/example07.c diff --git a/benchmarks/ctests/example07.ll b/benchmarks/tests/example07.ll similarity index 97% rename from benchmarks/ctests/example07.ll rename to benchmarks/tests/example07.ll index e12917a..ef3a2c6 100644 --- a/benchmarks/ctests/example07.ll +++ b/benchmarks/tests/example07.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example07.c' -source_filename = "../../benchmarks/ctests/example07.c" +; ModuleID = '../../benchmarks/tests/example07.c' +source_filename = "../../benchmarks/tests/example07.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example07.orig.ll b/benchmarks/tests/example07.orig.ll similarity index 96% rename from benchmarks/ctests/example07.orig.ll rename to benchmarks/tests/example07.orig.ll index 299b165..8b10b06 100644 --- a/benchmarks/ctests/example07.orig.ll +++ b/benchmarks/tests/example07.orig.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example07.c' -source_filename = "../../benchmarks/ctests/example07.c" +; ModuleID = '../../benchmarks/tests/example07.c' +source_filename = "../../benchmarks/tests/example07.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example08.c b/benchmarks/tests/example08.c similarity index 100% rename from benchmarks/ctests/example08.c rename to benchmarks/tests/example08.c diff --git a/benchmarks/ctests/example08.ll b/benchmarks/tests/example08.ll similarity index 96% rename from benchmarks/ctests/example08.ll rename to benchmarks/tests/example08.ll index 142b165..315670d 100644 --- a/benchmarks/ctests/example08.ll +++ b/benchmarks/tests/example08.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example08.c' -source_filename = "../../benchmarks/ctests/example08.c" +; ModuleID = '../../benchmarks/tests/example08.c' +source_filename = "../../benchmarks/tests/example08.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example08.orig.ll b/benchmarks/tests/example08.orig.ll similarity index 96% rename from benchmarks/ctests/example08.orig.ll rename to benchmarks/tests/example08.orig.ll index f0dbf25..39e141a 100644 --- a/benchmarks/ctests/example08.orig.ll +++ b/benchmarks/tests/example08.orig.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example08.c' -source_filename = "../../benchmarks/ctests/example08.c" +; ModuleID = '../../benchmarks/tests/example08.c' +source_filename = "../../benchmarks/tests/example08.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example09.c b/benchmarks/tests/example09.c similarity index 100% rename from benchmarks/ctests/example09.c rename to benchmarks/tests/example09.c diff --git a/benchmarks/ctests/example09.ll b/benchmarks/tests/example09.ll similarity index 97% rename from benchmarks/ctests/example09.ll rename to benchmarks/tests/example09.ll index 5ff5b64..02f1d07 100644 --- a/benchmarks/ctests/example09.ll +++ b/benchmarks/tests/example09.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example09.c' -source_filename = "../../benchmarks/ctests/example09.c" +; ModuleID = '../../benchmarks/tests/example09.c' +source_filename = "../../benchmarks/tests/example09.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example09.orig.ll b/benchmarks/tests/example09.orig.ll similarity index 96% rename from benchmarks/ctests/example09.orig.ll rename to benchmarks/tests/example09.orig.ll index 03d06bb..9694cbd 100644 --- a/benchmarks/ctests/example09.orig.ll +++ b/benchmarks/tests/example09.orig.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example09.c' -source_filename = "../../benchmarks/ctests/example09.c" +; ModuleID = '../../benchmarks/tests/example09.c' +source_filename = "../../benchmarks/tests/example09.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example10.c b/benchmarks/tests/example10.c similarity index 100% rename from benchmarks/ctests/example10.c rename to benchmarks/tests/example10.c diff --git a/benchmarks/ctests/example10.ll b/benchmarks/tests/example10.ll similarity index 96% rename from benchmarks/ctests/example10.ll rename to benchmarks/tests/example10.ll index a2df8f1..6741975 100644 --- a/benchmarks/ctests/example10.ll +++ b/benchmarks/tests/example10.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example10.c' -source_filename = "../../benchmarks/ctests/example10.c" +; ModuleID = '../../benchmarks/tests/example10.c' +source_filename = "../../benchmarks/tests/example10.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example10.orig.ll b/benchmarks/tests/example10.orig.ll similarity index 96% rename from benchmarks/ctests/example10.orig.ll rename to benchmarks/tests/example10.orig.ll index bbe99ff..7f3c08e 100644 --- a/benchmarks/ctests/example10.orig.ll +++ b/benchmarks/tests/example10.orig.ll @@ -1,5 +1,5 @@ -; ModuleID = '../../benchmarks/ctests/example10.c' -source_filename = "../../benchmarks/ctests/example10.c" +; ModuleID = '../../benchmarks/tests/example10.c' +source_filename = "../../benchmarks/tests/example10.c" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example11.ll b/benchmarks/tests/example11.ll similarity index 99% rename from benchmarks/ctests/example11.ll rename to benchmarks/tests/example11.ll index 82eea4a..5cc8424 100644 --- a/benchmarks/ctests/example11.ll +++ b/benchmarks/tests/example11.ll @@ -1,4 +1,4 @@ -; ModuleID = '../../benchmarks/ctests/example11.bc' +; ModuleID = '../../benchmarks/tests/example11.bc' source_filename = "example11.808d53e03ac95af8-cgu.0" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example11.orig.ll b/benchmarks/tests/example11.orig.ll similarity index 99% rename from benchmarks/ctests/example11.orig.ll rename to benchmarks/tests/example11.orig.ll index 49a1d31..06b5fb9 100644 --- a/benchmarks/ctests/example11.orig.ll +++ b/benchmarks/tests/example11.orig.ll @@ -1,4 +1,4 @@ -; ModuleID = '../../benchmarks/ctests/example11.bc' +; ModuleID = '../../benchmarks/tests/example11.bc' source_filename = "example11.808d53e03ac95af8-cgu.0" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" diff --git a/benchmarks/ctests/example11.rs b/benchmarks/tests/example11.rs similarity index 100% rename from benchmarks/ctests/example11.rs rename to benchmarks/tests/example11.rs diff --git a/benchmarks/ctests/example12.ll b/benchmarks/tests/example12.ll similarity index 100% rename from benchmarks/ctests/example12.ll rename to benchmarks/tests/example12.ll diff --git a/benchmarks/ctests/example12.orig.ll b/benchmarks/tests/example12.orig.ll similarity index 100% rename from benchmarks/ctests/example12.orig.ll rename to benchmarks/tests/example12.orig.ll diff --git a/benchmarks/ctests/example12.rs b/benchmarks/tests/example12.rs similarity index 100% rename from benchmarks/ctests/example12.rs rename to benchmarks/tests/example12.rs diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile index 00d4b7e..bacbb17 100644 --- a/ocelot/AtomicRegionInference/Makefile +++ b/ocelot/AtomicRegionInference/Makefile @@ -10,7 +10,7 @@ all: make eg7 make eg8 make eg9 - make eg19 + make eg10 make egr make eg11 @@ -42,59 +42,59 @@ eg12: TEST=example12 make testr run_eg1: - TEST=example01 make run && ../../benchmarks/ctests/example01.out + TEST=example01 make run && ../../benchmarks/tests/example01.out run_eg2: - TEST=example02 make run && ../../benchmarks/ctests/example02.out + TEST=example02 make run && ../../benchmarks/tests/example02.out run_eg3: - TEST=example03 make run && ../../benchmarks/ctests/example03.out + TEST=example03 make run && ../../benchmarks/tests/example03.out run_eg4: - TEST=example04 make run && ../../benchmarks/ctests/example04.out + TEST=example04 make run && ../../benchmarks/tests/example04.out run_eg5: - TEST=example05 make run && ../../benchmarks/ctests/example05.out + TEST=example05 make run && ../../benchmarks/tests/example05.out run_eg6: - TEST=example06 make run && ../../benchmarks/ctests/example06.out + TEST=example06 make run && ../../benchmarks/tests/example06.out run_eg7: - TEST=example07 make run && ../../benchmarks/ctests/example07.out + TEST=example07 make run && ../../benchmarks/tests/example07.out run_eg8: - TEST=example08 make run && ../../benchmarks/ctests/example08.out + TEST=example08 make run && ../../benchmarks/tests/example08.out run_eg9: - TEST=example09 make run && ../../benchmarks/ctests/example09.out + TEST=example09 make run && ../../benchmarks/tests/example09.out run_eg10: - TEST=example10 make run && ../../benchmarks/ctests/example10.out + TEST=example10 make run && ../../benchmarks/tests/example10.out test: $(MAKE) -C build clang -S -emit-llvm\ -fno-discard-value-names\ - ../../benchmarks/ctests/$(TEST).c\ - -o ../../benchmarks/ctests/$(TEST).orig.ll + ../../benchmarks/tests/$(TEST).c\ + -o ../../benchmarks/tests/$(TEST).orig.ll clang -S -emit-llvm\ -fpass-plugin=build/src/InferAtomsPass.dylib\ -fno-discard-value-names\ - ../../benchmarks/ctests/$(TEST).c\ - -o ../../benchmarks/ctests/$(TEST).ll + ../../benchmarks/tests/$(TEST).c\ + -o ../../benchmarks/tests/$(TEST).ll testr: $(MAKE) -C build - rustc ../../benchmarks/ctests/$(TEST).rs --emit llvm-bc -o ../../benchmarks/ctests/$(TEST).bc + rustc ../../benchmarks/tests/$(TEST).rs --emit llvm-bc -o ../../benchmarks/tests/$(TEST).bc clang -S -emit-llvm\ -fno-discard-value-names\ - ../../benchmarks/ctests/$(TEST).bc\ - -o ../../benchmarks/ctests/$(TEST).orig.ll + ../../benchmarks/tests/$(TEST).bc\ + -o ../../benchmarks/tests/$(TEST).orig.ll clang -S -emit-llvm\ -fpass-plugin=build/src/InferAtomsPass.dylib\ -fno-discard-value-names\ - ../../benchmarks/ctests/$(TEST).bc\ - -o ../../benchmarks/ctests/$(TEST).ll + ../../benchmarks/tests/$(TEST).bc\ + -o ../../benchmarks/tests/$(TEST).ll run: $(MAKE) -C build clang -fpass-plugin=build/src/InferAtomsPass.dylib\ - ../../benchmarks/ctests/$(TEST).c\ - -o ../../benchmarks/ctests/$(TEST).out + ../../benchmarks/tests/$(TEST).c\ + -o ../../benchmarks/tests/$(TEST).out clean_tests: - find ../../benchmarks/ctests -name "*.ll" -exec rm -rf {} \; + find ../../benchmarks/tests -name "*.ll" -exec rm -rf {} \; clean: rm -rf build diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index b70b112..64f4cf3 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -241,7 +241,6 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v isAtomicBoundary = true; } - // TODO: Exception with the entry block to a loop (prepone untainted insts instead) if (find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end() && !isa(&I) && !inExistingSet && !isAtomicBoundary) { #if DEBUG errs() << "__Should be delayed__\n"; From 883cb9c8d3fe36e8fe3f5f90d07e60b67d28323b Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Fri, 15 Mar 2024 19:58:05 -0700 Subject: [PATCH 17/18] [InferAtomsPass] Slight tweak to support Rust programs with loops Only small changes are required for the optimization to work on Rust programs involving loops. See tests `example.rs`, `example11.rs`, and `example12.rs`. --- benchmarks/intermittent.rs | 1 + benchmarks/tests/example.ll | 2 +- benchmarks/tests/example.orig.ll | 2 +- benchmarks/tests/example11.ll | 2 +- benchmarks/tests/example11.orig.ll | 2 +- benchmarks/tests/example12.ll | 274 ------------------ benchmarks/tests/example12.orig.ll | 4 +- ocelot/AtomicRegionInference/src/Helpers.cpp | 4 + .../src/InferFreshCons.cpp | 13 +- 9 files changed, 18 insertions(+), 286 deletions(-) delete mode 100644 benchmarks/tests/example12.ll diff --git a/benchmarks/intermittent.rs b/benchmarks/intermittent.rs index 8803b29..de3631d 100644 --- a/benchmarks/intermittent.rs +++ b/benchmarks/intermittent.rs @@ -29,6 +29,7 @@ fn Consistent(_var: T, _id: u16) -> () {} #[allow(dead_code)] #[allow(non_snake_case)] +#[no_mangle] fn FreshConsistent(_var: T, _id: u16) -> () {} //#[inline(always)] diff --git a/benchmarks/tests/example.ll b/benchmarks/tests/example.ll index edb717f..40607b5 100644 --- a/benchmarks/tests/example.ll +++ b/benchmarks/tests/example.ll @@ -174,5 +174,5 @@ attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } !0 = !{i32 8, !"PIC Level", i32 2} !1 = !{i32 7, !"PIE Level", i32 2} !2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} -!3 = !{i32 1115277} +!3 = !{i32 1115290} !4 = !{} diff --git a/benchmarks/tests/example.orig.ll b/benchmarks/tests/example.orig.ll index 921c0c6..7118a00 100644 --- a/benchmarks/tests/example.orig.ll +++ b/benchmarks/tests/example.orig.ll @@ -179,5 +179,5 @@ attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } !0 = !{i32 8, !"PIC Level", i32 2} !1 = !{i32 7, !"PIE Level", i32 2} !2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} -!3 = !{i32 1115277} +!3 = !{i32 1115290} !4 = !{} diff --git a/benchmarks/tests/example11.ll b/benchmarks/tests/example11.ll index 5cc8424..05a924d 100644 --- a/benchmarks/tests/example11.ll +++ b/benchmarks/tests/example11.ll @@ -175,5 +175,5 @@ attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } !0 = !{i32 8, !"PIC Level", i32 2} !1 = !{i32 7, !"PIE Level", i32 2} !2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} -!3 = !{i32 1115326} +!3 = !{i32 1115339} !4 = !{} diff --git a/benchmarks/tests/example11.orig.ll b/benchmarks/tests/example11.orig.ll index 06b5fb9..fff931a 100644 --- a/benchmarks/tests/example11.orig.ll +++ b/benchmarks/tests/example11.orig.ll @@ -180,5 +180,5 @@ attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } !0 = !{i32 8, !"PIC Level", i32 2} !1 = !{i32 7, !"PIE Level", i32 2} !2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} -!3 = !{i32 1115326} +!3 = !{i32 1115339} !4 = !{} diff --git a/benchmarks/tests/example12.ll b/benchmarks/tests/example12.ll deleted file mode 100644 index 7438e4d..0000000 --- a/benchmarks/tests/example12.ll +++ /dev/null @@ -1,274 +0,0 @@ -; ModuleID = '../../benchmarks/ctests/example12.bc' -source_filename = "example12.2ec73fdcc3bed253-cgu.0" -target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" -target triple = "arm64-apple-macosx12.0.0" - -@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E" }>, align 8 -@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8 -@atomic_depth = external global i16 - -; Function Attrs: noinline uwtable -define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %f) unnamed_addr #0 { -start: - call void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %f) - call void asm sideeffect "", "~{memory}"(), !srcloc !3 - ret void -} - -; Function Attrs: uwtable -define hidden i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { -start: - %_8 = alloca ptr, align 8 - %_5 = alloca i64, align 8 - store ptr %main, ptr %_8, align 8 - %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe) - store i64 %0, ptr %_5, align 8 - %v = load i64, ptr %_5, align 8, !noundef !4 - ret i64 %v -} - -; Function Attrs: inlinehint uwtable -define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1) unnamed_addr #2 { -start: - %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 - call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %_4) - %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"() - %_0 = zext i8 %self to i32 - ret i32 %_0 -} - -; Function Attrs: inlinehint uwtable -define internal i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %start1, i64 %n) unnamed_addr #2 { -start: - %rhs = trunc i64 %n to i32 - %_0 = add nsw i32 %start1, %rhs - ret i32 %_0 -} - -; Function Attrs: inlinehint uwtable -define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE"(ptr %_1) unnamed_addr #2 { -start: - %_2 = alloca {}, align 1 - %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 - %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0) - ret i32 %_0 -} - -; Function Attrs: inlinehint uwtable -define internal i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality { -start: - %1 = alloca { ptr, i32 }, align 8 - %_2 = alloca {}, align 1 - %_1 = alloca ptr, align 8 - store ptr %0, ptr %_1, align 8 - %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1) - to label %bb1 unwind label %cleanup - -bb3: ; preds = %cleanup - %2 = load ptr, ptr %1, align 8, !noundef !4 - %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 - %4 = load i32, ptr %3, align 8, !noundef !4 - %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 - %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 - resume { ptr, i32 } %6 - -cleanup: ; preds = %start - %7 = landingpad { ptr, i32 } - cleanup - %8 = extractvalue { ptr, i32 } %7, 0 - %9 = extractvalue { ptr, i32 } %7, 1 - %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0 - store ptr %8, ptr %10, align 8 - %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 - store i32 %9, ptr %11, align 8 - br label %bb3 - -bb1: ; preds = %start - ret i32 %_0 -} - -; Function Attrs: inlinehint uwtable -define internal void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %_1) unnamed_addr #2 { -start: - %_2 = alloca {}, align 1 - call void %_1() - ret void -} - -; Function Attrs: inlinehint uwtable -define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E"(ptr align 8 %_1) unnamed_addr #2 { -start: - ret void -} - -; Function Attrs: inlinehint uwtable -define internal { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %self) unnamed_addr #2 { -start: - %0 = call { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self) - %_0.0 = extractvalue { i32, i32 } %0, 0 - %_0.1 = extractvalue { i32, i32 } %0, 1 - %1 = insertvalue { i32, i32 } poison, i32 %_0.0, 0 - %2 = insertvalue { i32, i32 } %1, i32 %_0.1, 1 - ret { i32, i32 } %2 -} - -; Function Attrs: inlinehint uwtable -define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"() unnamed_addr #2 { -start: - ret i8 0 -} - -; Function Attrs: inlinehint uwtable -define internal { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %self.0, i32 %self.1) unnamed_addr #2 { -start: - %0 = insertvalue { i32, i32 } poison, i32 %self.0, 0 - %1 = insertvalue { i32, i32 } %0, i32 %self.1, 1 - ret { i32, i32 } %1 -} - -; Function Attrs: inlinehint uwtable -define internal { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self) unnamed_addr #2 { -start: - %_0 = alloca { i32, i32 }, align 4 - %_4 = getelementptr inbounds { i32, i32 }, ptr %self, i32 0, i32 1 - %_3.i = load i32, ptr %self, align 4, !noundef !4 - %_4.i = load i32, ptr %_4, align 4, !noundef !4 - %_0.i = icmp slt i32 %_3.i, %_4.i - br i1 %_0.i, label %bb2, label %bb4 - -bb4: ; preds = %start - store i32 0, ptr %_0, align 4 - br label %bb5 - -bb2: ; preds = %start - %old = load i32, ptr %self, align 4, !noundef !4 - %_6 = call i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %old, i64 1) - store i32 %_6, ptr %self, align 4 - %0 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1 - store i32 %old, ptr %0, align 4 - store i32 1, ptr %_0, align 4 - br label %bb5 - -bb5: ; preds = %bb2, %bb4 - %1 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 0 - %2 = load i32, ptr %1, align 4, !range !5, !noundef !4 - %3 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1 - %4 = load i32, ptr %3, align 4 - %5 = insertvalue { i32, i32 } poison, i32 %2, 0 - %6 = insertvalue { i32, i32 } %5, i32 %4, 1 - ret { i32, i32 } %6 -} - -; Function Attrs: uwtable -define dso_local i32 @input() unnamed_addr #1 { -start: - ret i32 0 -} - -; Function Attrs: uwtable -define dso_local void @log(i32 %i) unnamed_addr #1 { -start: - ret void -} - -; Function Attrs: uwtable -define dso_local void @app() unnamed_addr #1 { -start: - %_5 = alloca { i32, i32 }, align 4 - %iter = alloca { i32, i32 }, align 4 - %_3 = alloca { i32, i32 }, align 4 - call void @atomic_start() - %x = call i32 @input() - store i32 0, ptr %_3, align 4 - %0 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1 - store i32 10, ptr %0, align 4 - %1 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 0 - %2 = load i32, ptr %1, align 4, !noundef !4 - %3 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1 - %4 = load i32, ptr %3, align 4, !noundef !4 - %5 = call { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %2, i32 %4) - %_2.0 = extractvalue { i32, i32 } %5, 0 - %_2.1 = extractvalue { i32, i32 } %5, 1 - %6 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 0 - store i32 %_2.0, ptr %6, align 4 - %7 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 1 - store i32 %_2.1, ptr %7, align 4 - br label %bb3 - -bb3: ; preds = %bb5, %start - %8 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %iter) - store { i32, i32 } %8, ptr %_5, align 4 - %9 = load i32, ptr %_5, align 4, !range !5, !noundef !4 - %_7 = zext i32 %9 to i64 - %10 = icmp eq i64 %_7, 0 - br i1 %10, label %bb7, label %bb5 - -bb7: ; preds = %bb3 - call void @atomic_end() - ret void - -bb5: ; preds = %bb3 - call void @log(i32 1) - call void @log(i32 %x) - br label %bb3 - -bb6: ; No predecessors! - unreachable -} - -; Function Attrs: uwtable -define internal void @_ZN9example124main17h35539225bd174e48E() unnamed_addr #1 { -start: - call void @app() - ret void -} - -; Function Attrs: uwtable -define dso_local void @atomic_start() unnamed_addr #1 { -start: - %local = load i16, ptr @atomic_depth, align 2, !noundef !4 - call void @start_atomic() - ret void -} - -; Function Attrs: uwtable -define dso_local void @atomic_end() unnamed_addr #1 { -start: - call void @end_atomic() - ret void -} - -; Function Attrs: uwtable -declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 - -; Function Attrs: uwtable -declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1 - -; Function Attrs: uwtable -declare void @start_atomic() unnamed_addr #1 - -; Function Attrs: uwtable -declare void @end_atomic() unnamed_addr #1 - -define i32 @main(i32 %0, ptr %1) unnamed_addr #3 { -top: - %2 = sext i32 %0 to i64 - %3 = call i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr @_ZN9example124main17h35539225bd174e48E, i64 %2, ptr %1, i8 0) - %4 = trunc i64 %3 to i32 - ret i32 %4 -} - -attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } -attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } -attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } -attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 8, !"PIC Level", i32 2} -!1 = !{i32 7, !"PIE Level", i32 2} -!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} -!3 = !{i32 1453212} -!4 = !{} -!5 = !{i32 0, i32 2} diff --git a/benchmarks/tests/example12.orig.ll b/benchmarks/tests/example12.orig.ll index a4c7d70..3ccefe2 100644 --- a/benchmarks/tests/example12.orig.ll +++ b/benchmarks/tests/example12.orig.ll @@ -1,4 +1,4 @@ -; ModuleID = '../../benchmarks/ctests/example12.bc' +; ModuleID = '../../benchmarks/tests/example12.bc' source_filename = "example12.2ec73fdcc3bed253-cgu.0" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx12.0.0" @@ -274,6 +274,6 @@ attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } !0 = !{i32 8, !"PIC Level", i32 2} !1 = !{i32 7, !"PIE Level", i32 2} !2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} -!3 = !{i32 1453212} +!3 = !{i32 1453225} !4 = !{} !5 = !{i32 0, i32 2} diff --git a/ocelot/AtomicRegionInference/src/Helpers.cpp b/ocelot/AtomicRegionInference/src/Helpers.cpp index e446001..d71bf11 100644 --- a/ocelot/AtomicRegionInference/src/Helpers.cpp +++ b/ocelot/AtomicRegionInference/src/Helpers.cpp @@ -80,6 +80,10 @@ void patchClonedBlock(BasicBlock* block, inst_inst_map clonedInsts) { auto* cond = dyn_cast(ci->getOperand(0)); inst_inst_map::iterator it = clonedInsts.find(cond); if (it != clonedInsts.end()) ci->setOperand(0, it->second); + } else if (auto* ei = dyn_cast(&I)) { + auto* operand = dyn_cast(ei->getOperand(0)); + inst_inst_map::iterator it = clonedInsts.find(operand); + if (it != clonedInsts.end()) ei->setOperand(0, it->second); } } } diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp index 64f4cf3..eb53e0f 100644 --- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -314,7 +314,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v inst_inst_map instClones; auto loopBlocks = taintedLoop->getBlocks(); - assert(loopBlocks.size() == 3); + // assert(loopBlocks.size() == 3); for (int i = 0; i < loopBlocks.size(); i++) { auto* block = loopBlocks[i]; @@ -366,7 +366,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v assert(bi->isConditional()); bi->setCondition(prev); - if (auto* B = dyn_cast(bi->getOperand(1))) { + if (auto* B = dyn_cast(bi->getOperand(2))) { + // errs() << "ayo: " << *B << "\n"; forEnd = B; } } @@ -403,7 +404,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v clonedLoop.push_back(clonedBlock); } - BasicBlock* forEndClone = BasicBlock::Create(forEnd->getContext(), forEnd->getName(), homeFun); + auto* forEndClone = BasicBlock::Create(forEnd->getContext(), forEnd->getName(), homeFun); IRBuilder builder(forEndClone); for (auto& I : *forEnd) { if (!isa(I) && !isa(I)) { @@ -438,7 +439,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v } // for.body else if (i == 1) { - bi->setSuccessor(0, clonedLoop[2]); + // bi->setSuccessor(0, clonedLoop[2]); + bi->setSuccessor(0, clonedLoop[0]); } // for.inc else if (i == 2) { @@ -653,8 +655,7 @@ Function* InferFreshCons::findCandidate(std::map bloc if (funList.size() == 1) return funList.at(0); /* Algo goal: get the deepest function that still calls (or is) all funcs in funcList. - * Consider: multiple calls? Should be dealt with in the addRegion -- eventually each caller - * gets its own region + * Consider: multiple calls? Should be dealt with in the addRegion -- eventually each caller gets its own region */ Function* goal = nullptr; #if DEBUG From ca0e66775ec38425251766e6fbf5761b2f10016d Mon Sep 17 00:00:00 2001 From: Robert Zhang Date: Sun, 17 Mar 2024 00:03:24 -0400 Subject: [PATCH 18/18] [InferAtomsPass] More Rust loop tests --- benchmarks/tests/example12.ll | 290 ++++++++++++++++++++++++++ benchmarks/tests/example13.ll | 275 ++++++++++++++++++++++++ benchmarks/tests/example13.orig.ll | 280 +++++++++++++++++++++++++ benchmarks/tests/example13.rs | 25 +++ ocelot/AtomicRegionInference/Makefile | 2 + 5 files changed, 872 insertions(+) create mode 100644 benchmarks/tests/example12.ll create mode 100644 benchmarks/tests/example13.ll create mode 100644 benchmarks/tests/example13.orig.ll create mode 100644 benchmarks/tests/example13.rs diff --git a/benchmarks/tests/example12.ll b/benchmarks/tests/example12.ll new file mode 100644 index 0000000..8ad00f3 --- /dev/null +++ b/benchmarks/tests/example12.ll @@ -0,0 +1,290 @@ +; ModuleID = '../../benchmarks/tests/example12.bc' +source_filename = "example12.2ec73fdcc3bed253-cgu.0" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E" }>, align 8 +@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8 +@atomic_depth = external global i16 + +; Function Attrs: noinline uwtable +define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %f) unnamed_addr #0 { +start: + call void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %f) + call void asm sideeffect "", "~{memory}"(), !srcloc !3 + ret void +} + +; Function Attrs: uwtable +define hidden i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { +start: + %_8 = alloca ptr, align 8 + %_5 = alloca i64, align 8 + store ptr %main, ptr %_8, align 8 + %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe) + store i64 %0, ptr %_5, align 8 + %v = load i64, ptr %_5, align 8, !noundef !4 + ret i64 %v +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1) unnamed_addr #2 { +start: + %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %_4) + %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"() + %_0 = zext i8 %self to i32 + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %start1, i64 %n) unnamed_addr #2 { +start: + %rhs = trunc i64 %n to i32 + %_0 = add nsw i32 %start1, %rhs + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE"(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0) + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality { +start: + %1 = alloca { ptr, i32 }, align 8 + %_2 = alloca {}, align 1 + %_1 = alloca ptr, align 8 + store ptr %0, ptr %_1, align 8 + %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1) + to label %bb1 unwind label %cleanup + +bb3: ; preds = %cleanup + %2 = load ptr, ptr %1, align 8, !noundef !4 + %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + %4 = load i32, ptr %3, align 8, !noundef !4 + %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 + %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 + resume { ptr, i32 } %6 + +cleanup: ; preds = %start + %7 = landingpad { ptr, i32 } + cleanup + %8 = extractvalue { ptr, i32 } %7, 0 + %9 = extractvalue { ptr, i32 } %7, 1 + %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0 + store ptr %8, ptr %10, align 8 + %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + store i32 %9, ptr %11, align 8 + br label %bb3 + +bb1: ; preds = %start + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + call void %_1() + ret void +} + +; Function Attrs: inlinehint uwtable +define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E"(ptr align 8 %_1) unnamed_addr #2 { +start: + ret void +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %self) unnamed_addr #2 { +start: + %0 = call { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self) + %_0.0 = extractvalue { i32, i32 } %0, 0 + %_0.1 = extractvalue { i32, i32 } %0, 1 + %1 = insertvalue { i32, i32 } poison, i32 %_0.0, 0 + %2 = insertvalue { i32, i32 } %1, i32 %_0.1, 1 + ret { i32, i32 } %2 +} + +; Function Attrs: inlinehint uwtable +define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"() unnamed_addr #2 { +start: + ret i8 0 +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %self.0, i32 %self.1) unnamed_addr #2 { +start: + %0 = insertvalue { i32, i32 } poison, i32 %self.0, 0 + %1 = insertvalue { i32, i32 } %0, i32 %self.1, 1 + ret { i32, i32 } %1 +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self) unnamed_addr #2 { +start: + %_0 = alloca { i32, i32 }, align 4 + %_4 = getelementptr inbounds { i32, i32 }, ptr %self, i32 0, i32 1 + %_3.i = load i32, ptr %self, align 4, !noundef !4 + %_4.i = load i32, ptr %_4, align 4, !noundef !4 + %_0.i = icmp slt i32 %_3.i, %_4.i + br i1 %_0.i, label %bb2, label %bb4 + +bb4: ; preds = %start + store i32 0, ptr %_0, align 4 + br label %bb5 + +bb2: ; preds = %start + %old = load i32, ptr %self, align 4, !noundef !4 + %_6 = call i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %old, i64 1) + store i32 %_6, ptr %self, align 4 + %0 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1 + store i32 %old, ptr %0, align 4 + store i32 1, ptr %_0, align 4 + br label %bb5 + +bb5: ; preds = %bb2, %bb4 + %1 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 0 + %2 = load i32, ptr %1, align 4, !range !5, !noundef !4 + %3 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1 + %4 = load i32, ptr %3, align 4 + %5 = insertvalue { i32, i32 } poison, i32 %2, 0 + %6 = insertvalue { i32, i32 } %5, i32 %4, 1 + ret { i32, i32 } %6 +} + +; Function Attrs: uwtable +define dso_local i32 @input() unnamed_addr #1 { +start: + ret i32 0 +} + +; Function Attrs: uwtable +define dso_local void @log(i32 %i) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @app() unnamed_addr #1 { +start: + %0 = alloca { i32, i32 }, align 8 + %_5 = alloca { i32, i32 }, align 4 + %iter = alloca { i32, i32 }, align 4 + %_3 = alloca { i32, i32 }, align 4 + call void @atomic_start() + %x = call i32 @input() + store i32 0, ptr %_3, align 4 + %1 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1 + store i32 10, ptr %1, align 4 + %2 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 0 + %3 = load i32, ptr %2, align 4, !noundef !4 + %4 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1 + %5 = load i32, ptr %4, align 4, !noundef !4 + %6 = call { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %3, i32 %5) + %7 = extractvalue { i32, i32 } %6, 0 + %8 = extractvalue { i32, i32 } %6, 1 + %9 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 0 + store i32 %7, ptr %9, align 4 + %10 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 1 + store i32 %8, ptr %10, align 4 + br label %bb3 + +bb3: ; preds = %bb5, %start, , + %11 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %iter) + store { i32, i32 } %11, ptr %_5, align 4 + %12 = load i32, ptr %_5, align 4, !range !5, !noundef !4 + %_7 = zext i32 %12 to i64 + %13 = icmp eq i64 %_7, 0 + br i1 %13, label %bb7, label %bb5 + +bb7: ; preds = %bb3 + call void @atomic_end() + store { i32, i32 } %11, ptr %0, align 4 + br label %bb31 + +bb5: ; preds = %bb3 + call void @log(i32 %x) + br label %bb3 + +bb6: ; No predecessors! + unreachable + +bb31: ; preds = %bb52, %bb7 + %14 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %iter) + store { i32, i32 } %11, ptr %_5, align 4 + %15 = load i32, ptr %0, align 4, !range !5, !noundef !4 + %16 = zext i32 %12 to i64 + %17 = icmp eq i64 %_7, 0 + br i1 %17, label %bb52, label %bb73 + +bb52: ; preds = %bb31 + call void @log(i32 1) + br label %bb31 + +bb73: ; preds = %bb31 + ret void +} + +; Function Attrs: uwtable +define internal void @_ZN9example124main17h35539225bd174e48E() unnamed_addr #1 { +start: + call void @app() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_start() unnamed_addr #1 { +start: + %local = load i16, ptr @atomic_depth, align 2, !noundef !4 + call void @start_atomic() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_end() unnamed_addr #1 { +start: + call void @end_atomic() + ret void +} + +; Function Attrs: uwtable +declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 + +; Function Attrs: uwtable +declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1 + +; Function Attrs: uwtable +declare void @start_atomic() unnamed_addr #1 + +; Function Attrs: uwtable +declare void @end_atomic() unnamed_addr #1 + +define i32 @main(i32 %0, ptr %1) unnamed_addr #3 { +top: + %2 = sext i32 %0 to i64 + %3 = call i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr @_ZN9example124main17h35539225bd174e48E, i64 %2, ptr %1, i8 0) + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 8, !"PIC Level", i32 2} +!1 = !{i32 7, !"PIE Level", i32 2} +!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} +!3 = !{i32 1453225} +!4 = !{} +!5 = !{i32 0, i32 2} diff --git a/benchmarks/tests/example13.ll b/benchmarks/tests/example13.ll new file mode 100644 index 0000000..1a22fb2 --- /dev/null +++ b/benchmarks/tests/example13.ll @@ -0,0 +1,275 @@ +; ModuleID = '../../benchmarks/tests/example13.bc' +source_filename = "example13.a75a82856bfae51d-cgu.0" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hf0f1c0343a3d5304E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h4dfc989e8a89cebeE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E" }>, align 8 +@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8 +@atomic_depth = external global i16 + +; Function Attrs: noinline uwtable +define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf4f9b68dd936cd73E(ptr %f) unnamed_addr #0 { +start: + call void @_ZN4core3ops8function6FnOnce9call_once17h6b12a0453d0fac53E(ptr %f) + call void asm sideeffect "", "~{memory}"(), !srcloc !3 + ret void +} + +; Function Attrs: uwtable +define hidden i64 @_ZN3std2rt10lang_start17h431fe12d2c8c1de6E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { +start: + %_8 = alloca ptr, align 8 + %_5 = alloca i64, align 8 + store ptr %main, ptr %_8, align 8 + %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe) + store i64 %0, ptr %_5, align 8 + %v = load i64, ptr %_5, align 8, !noundef !4 + ret i64 %v +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E"(ptr align 8 %_1) unnamed_addr #2 { +start: + %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf4f9b68dd936cd73E(ptr %_4) + %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h2db0f42e6485e7e4E"() + %_0 = zext i8 %self to i32 + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h4944c1e1e44c8861E"(i32 %start1, i64 %n) unnamed_addr #2 { +start: + %rhs = trunc i64 %n to i32 + %_0 = add nsw i32 %start1, %rhs + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h4dfc989e8a89cebeE"(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17hde4d0e94a62ddc18E(ptr %0) + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @_ZN4core3ops8function6FnOnce9call_once17h6b12a0453d0fac53E(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + call void %_1() + ret void +} + +; Function Attrs: inlinehint uwtable +define internal i32 @_ZN4core3ops8function6FnOnce9call_once17hde4d0e94a62ddc18E(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality { +start: + %1 = alloca { ptr, i32 }, align 8 + %_2 = alloca {}, align 1 + %_1 = alloca ptr, align 8 + store ptr %0, ptr %_1, align 8 + %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E"(ptr align 8 %_1) + to label %bb1 unwind label %cleanup + +bb3: ; preds = %cleanup + %2 = load ptr, ptr %1, align 8, !noundef !4 + %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + %4 = load i32, ptr %3, align 8, !noundef !4 + %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 + %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 + resume { ptr, i32 } %6 + +cleanup: ; preds = %start + %7 = landingpad { ptr, i32 } + cleanup + %8 = extractvalue { ptr, i32 } %7, 0 + %9 = extractvalue { ptr, i32 } %7, 1 + %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0 + store ptr %8, ptr %10, align 8 + %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + store i32 %9, ptr %11, align 8 + br label %bb3 + +bb1: ; preds = %start + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hf0f1c0343a3d5304E"(ptr align 8 %_1) unnamed_addr #2 { +start: + ret void +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h81d1ae0fa0da4546E"(ptr align 4 %self) unnamed_addr #2 { +start: + %0 = call { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h3a2fc0cbb86bcd54E"(ptr align 4 %self) + %_0.0 = extractvalue { i32, i32 } %0, 0 + %_0.1 = extractvalue { i32, i32 } %0, 1 + %1 = insertvalue { i32, i32 } poison, i32 %_0.0, 0 + %2 = insertvalue { i32, i32 } %1, i32 %_0.1, 1 + ret { i32, i32 } %2 +} + +; Function Attrs: inlinehint uwtable +define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h2db0f42e6485e7e4E"() unnamed_addr #2 { +start: + ret i8 0 +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h80dc70a24d0c93edE"(i32 %self.0, i32 %self.1) unnamed_addr #2 { +start: + %0 = insertvalue { i32, i32 } poison, i32 %self.0, 0 + %1 = insertvalue { i32, i32 } %0, i32 %self.1, 1 + ret { i32, i32 } %1 +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h3a2fc0cbb86bcd54E"(ptr align 4 %self) unnamed_addr #2 { +start: + %_0 = alloca { i32, i32 }, align 4 + %_4 = getelementptr inbounds { i32, i32 }, ptr %self, i32 0, i32 1 + %_3.i = load i32, ptr %self, align 4, !noundef !4 + %_4.i = load i32, ptr %_4, align 4, !noundef !4 + %_0.i = icmp slt i32 %_3.i, %_4.i + br i1 %_0.i, label %bb2, label %bb4 + +bb4: ; preds = %start + store i32 0, ptr %_0, align 4 + br label %bb5 + +bb2: ; preds = %start + %old = load i32, ptr %self, align 4, !noundef !4 + %_6 = call i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h4944c1e1e44c8861E"(i32 %old, i64 1) + store i32 %_6, ptr %self, align 4 + %0 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1 + store i32 %old, ptr %0, align 4 + store i32 1, ptr %_0, align 4 + br label %bb5 + +bb5: ; preds = %bb2, %bb4 + %1 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 0 + %2 = load i32, ptr %1, align 4, !range !5, !noundef !4 + %3 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1 + %4 = load i32, ptr %3, align 4 + %5 = insertvalue { i32, i32 } poison, i32 %2, 0 + %6 = insertvalue { i32, i32 } %5, i32 %4, 1 + ret { i32, i32 } %6 +} + +; Function Attrs: uwtable +define dso_local i32 @input() unnamed_addr #1 { +start: + ret i32 0 +} + +; Function Attrs: uwtable +define dso_local void @log(i32 %i) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @app() unnamed_addr #1 { +start: + %_6 = alloca { i32, i32 }, align 4 + %iter = alloca { i32, i32 }, align 4 + %_3 = alloca { i32, i32 }, align 4 + call void @atomic_start() + %x = call i32 @input() + store i32 %x, ptr %_3, align 4 + %0 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1 + store i32 10, ptr %0, align 4 + %1 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 0 + %2 = load i32, ptr %1, align 4, !noundef !4 + %3 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1 + %4 = load i32, ptr %3, align 4, !noundef !4 + %5 = call { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h80dc70a24d0c93edE"(i32 %2, i32 %4) + %6 = extractvalue { i32, i32 } %5, 0 + %7 = extractvalue { i32, i32 } %5, 1 + %8 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 0 + store i32 %6, ptr %8, align 4 + %9 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 1 + store i32 %7, ptr %9, align 4 + br label %bb3 + +bb3: ; preds = %start, %bb5, + %10 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h81d1ae0fa0da4546E"(ptr align 4 %iter) + store { i32, i32 } %10, ptr %_6, align 4 + %11 = load i32, ptr %_6, align 4, !range !5, !noundef !4 + %_8 = zext i32 %11 to i64 + %12 = icmp eq i64 %_8, 0 + br i1 %12, label %bb7, label %bb5 + +bb7: ; preds = %bb3 + call void @atomic_end() + ret void + +bb5: ; preds = %bb3 + %13 = getelementptr inbounds { i32, i32 }, ptr %_6, i32 0, i32 1 + %i = load i32, ptr %13, align 4, !noundef !4 + call void @log(i32 %i) + br label %bb3 + +bb6: ; No predecessors! + unreachable +} + +; Function Attrs: uwtable +define internal void @_ZN9example134main17haba30008cc3025a3E() unnamed_addr #1 { +start: + call void @app() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_start() unnamed_addr #1 { +start: + %local = load i16, ptr @atomic_depth, align 2, !noundef !4 + call void @start_atomic() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_end() unnamed_addr #1 { +start: + call void @end_atomic() + ret void +} + +; Function Attrs: uwtable +declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 + +; Function Attrs: uwtable +declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1 + +; Function Attrs: uwtable +declare void @start_atomic() unnamed_addr #1 + +; Function Attrs: uwtable +declare void @end_atomic() unnamed_addr #1 + +define i32 @main(i32 %0, ptr %1) unnamed_addr #3 { +top: + %2 = sext i32 %0 to i64 + %3 = call i64 @_ZN3std2rt10lang_start17h431fe12d2c8c1de6E(ptr @_ZN9example134main17haba30008cc3025a3E, i64 %2, ptr %1, i8 0) + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 8, !"PIC Level", i32 2} +!1 = !{i32 7, !"PIE Level", i32 2} +!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} +!3 = !{i32 1453209} +!4 = !{} +!5 = !{i32 0, i32 2} diff --git a/benchmarks/tests/example13.orig.ll b/benchmarks/tests/example13.orig.ll new file mode 100644 index 0000000..564dab2 --- /dev/null +++ b/benchmarks/tests/example13.orig.ll @@ -0,0 +1,280 @@ +; ModuleID = '../../benchmarks/tests/example13.bc' +source_filename = "example13.a75a82856bfae51d-cgu.0" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx12.0.0" + +@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hf0f1c0343a3d5304E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h4dfc989e8a89cebeE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E" }>, align 8 +@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8 +@atomic_depth = external global i16 + +; Function Attrs: noinline uwtable +define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf4f9b68dd936cd73E(ptr %f) unnamed_addr #0 { +start: + call void @_ZN4core3ops8function6FnOnce9call_once17h6b12a0453d0fac53E(ptr %f) + call void asm sideeffect "", "~{memory}"(), !srcloc !3 + ret void +} + +; Function Attrs: uwtable +define hidden i64 @_ZN3std2rt10lang_start17h431fe12d2c8c1de6E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { +start: + %_8 = alloca ptr, align 8 + %_5 = alloca i64, align 8 + store ptr %main, ptr %_8, align 8 + %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe) + store i64 %0, ptr %_5, align 8 + %v = load i64, ptr %_5, align 8, !noundef !4 + ret i64 %v +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E"(ptr align 8 %_1) unnamed_addr #2 { +start: + %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf4f9b68dd936cd73E(ptr %_4) + %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h2db0f42e6485e7e4E"() + %_0 = zext i8 %self to i32 + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h4944c1e1e44c8861E"(i32 %start1, i64 %n) unnamed_addr #2 { +start: + %rhs = trunc i64 %n to i32 + %_0 = add nsw i32 %start1, %rhs + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h4dfc989e8a89cebeE"(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4 + %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17hde4d0e94a62ddc18E(ptr %0) + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @_ZN4core3ops8function6FnOnce9call_once17h6b12a0453d0fac53E(ptr %_1) unnamed_addr #2 { +start: + %_2 = alloca {}, align 1 + call void %_1() + ret void +} + +; Function Attrs: inlinehint uwtable +define internal i32 @_ZN4core3ops8function6FnOnce9call_once17hde4d0e94a62ddc18E(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality { +start: + %1 = alloca { ptr, i32 }, align 8 + %_2 = alloca {}, align 1 + %_1 = alloca ptr, align 8 + store ptr %0, ptr %_1, align 8 + %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E"(ptr align 8 %_1) + to label %bb1 unwind label %cleanup + +bb3: ; preds = %cleanup + %2 = load ptr, ptr %1, align 8, !noundef !4 + %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + %4 = load i32, ptr %3, align 8, !noundef !4 + %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 + %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 + resume { ptr, i32 } %6 + +cleanup: ; preds = %start + %7 = landingpad { ptr, i32 } + cleanup + %8 = extractvalue { ptr, i32 } %7, 0 + %9 = extractvalue { ptr, i32 } %7, 1 + %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0 + store ptr %8, ptr %10, align 8 + %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1 + store i32 %9, ptr %11, align 8 + br label %bb3 + +bb1: ; preds = %start + ret i32 %_0 +} + +; Function Attrs: inlinehint uwtable +define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hf0f1c0343a3d5304E"(ptr align 8 %_1) unnamed_addr #2 { +start: + ret void +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h81d1ae0fa0da4546E"(ptr align 4 %self) unnamed_addr #2 { +start: + %0 = call { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h3a2fc0cbb86bcd54E"(ptr align 4 %self) + %_0.0 = extractvalue { i32, i32 } %0, 0 + %_0.1 = extractvalue { i32, i32 } %0, 1 + %1 = insertvalue { i32, i32 } poison, i32 %_0.0, 0 + %2 = insertvalue { i32, i32 } %1, i32 %_0.1, 1 + ret { i32, i32 } %2 +} + +; Function Attrs: inlinehint uwtable +define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h2db0f42e6485e7e4E"() unnamed_addr #2 { +start: + ret i8 0 +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h80dc70a24d0c93edE"(i32 %self.0, i32 %self.1) unnamed_addr #2 { +start: + %0 = insertvalue { i32, i32 } poison, i32 %self.0, 0 + %1 = insertvalue { i32, i32 } %0, i32 %self.1, 1 + ret { i32, i32 } %1 +} + +; Function Attrs: inlinehint uwtable +define internal { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h3a2fc0cbb86bcd54E"(ptr align 4 %self) unnamed_addr #2 { +start: + %_0 = alloca { i32, i32 }, align 4 + %_4 = getelementptr inbounds { i32, i32 }, ptr %self, i32 0, i32 1 + %_3.i = load i32, ptr %self, align 4, !noundef !4 + %_4.i = load i32, ptr %_4, align 4, !noundef !4 + %_0.i = icmp slt i32 %_3.i, %_4.i + br i1 %_0.i, label %bb2, label %bb4 + +bb4: ; preds = %start + store i32 0, ptr %_0, align 4 + br label %bb5 + +bb2: ; preds = %start + %old = load i32, ptr %self, align 4, !noundef !4 + %_6 = call i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h4944c1e1e44c8861E"(i32 %old, i64 1) + store i32 %_6, ptr %self, align 4 + %0 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1 + store i32 %old, ptr %0, align 4 + store i32 1, ptr %_0, align 4 + br label %bb5 + +bb5: ; preds = %bb2, %bb4 + %1 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 0 + %2 = load i32, ptr %1, align 4, !range !5, !noundef !4 + %3 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1 + %4 = load i32, ptr %3, align 4 + %5 = insertvalue { i32, i32 } poison, i32 %2, 0 + %6 = insertvalue { i32, i32 } %5, i32 %4, 1 + ret { i32, i32 } %6 +} + +; Function Attrs: uwtable +define dso_local i32 @input() unnamed_addr #1 { +start: + ret i32 0 +} + +; Function Attrs: uwtable +define dso_local void @log(i32 %i) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @app() unnamed_addr #1 { +start: + %_6 = alloca { i32, i32 }, align 4 + %iter = alloca { i32, i32 }, align 4 + %_3 = alloca { i32, i32 }, align 4 + %x = call i32 @input() + store i32 %x, ptr %_3, align 4 + %0 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1 + store i32 10, ptr %0, align 4 + %1 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 0 + %2 = load i32, ptr %1, align 4, !noundef !4 + %3 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1 + %4 = load i32, ptr %3, align 4, !noundef !4 + %5 = call { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h80dc70a24d0c93edE"(i32 %2, i32 %4) + %_2.0 = extractvalue { i32, i32 } %5, 0 + %_2.1 = extractvalue { i32, i32 } %5, 1 + %6 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 0 + store i32 %_2.0, ptr %6, align 4 + %7 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 1 + store i32 %_2.1, ptr %7, align 4 + br label %bb3 + +bb3: ; preds = %bb5, %start + %8 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h81d1ae0fa0da4546E"(ptr align 4 %iter) + store { i32, i32 } %8, ptr %_6, align 4 + %9 = load i32, ptr %_6, align 4, !range !5, !noundef !4 + %_8 = zext i32 %9 to i64 + %10 = icmp eq i64 %_8, 0 + br i1 %10, label %bb7, label %bb5 + +bb7: ; preds = %bb3 + call void @Fresh(i32 %x) + ret void + +bb5: ; preds = %bb3 + %11 = getelementptr inbounds { i32, i32 }, ptr %_6, i32 0, i32 1 + %i = load i32, ptr %11, align 4, !noundef !4 + call void @log(i32 %i) + br label %bb3 + +bb6: ; No predecessors! + unreachable +} + +; Function Attrs: uwtable +define internal void @_ZN9example134main17haba30008cc3025a3E() unnamed_addr #1 { +start: + call void @app() + ret void +} + +; Function Attrs: uwtable +define internal void @Fresh(i32 %_var) unnamed_addr #1 { +start: + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_start() unnamed_addr #1 { +start: + %local = load i16, ptr @atomic_depth, align 2, !noundef !4 + call void @start_atomic() + ret void +} + +; Function Attrs: uwtable +define dso_local void @atomic_end() unnamed_addr #1 { +start: + call void @end_atomic() + ret void +} + +; Function Attrs: uwtable +declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 + +; Function Attrs: uwtable +declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1 + +; Function Attrs: uwtable +declare void @start_atomic() unnamed_addr #1 + +; Function Attrs: uwtable +declare void @end_atomic() unnamed_addr #1 + +define i32 @main(i32 %0, ptr %1) unnamed_addr #3 { +top: + %2 = sext i32 %0 to i64 + %3 = call i64 @_ZN3std2rt10lang_start17h431fe12d2c8c1de6E(ptr @_ZN9example134main17haba30008cc3025a3E, i64 %2, ptr %1, i8 0) + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } +attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 8, !"PIC Level", i32 2} +!1 = !{i32 7, !"PIE Level", i32 2} +!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"} +!3 = !{i32 1453209} +!4 = !{} +!5 = !{i32 0, i32 2} diff --git a/benchmarks/tests/example13.rs b/benchmarks/tests/example13.rs new file mode 100644 index 0000000..3748f61 --- /dev/null +++ b/benchmarks/tests/example13.rs @@ -0,0 +1,25 @@ +include!("../intermittent.rs"); + +#[no_mangle] +fn input() -> i32 { + 0 +} + +#[no_mangle] +pub static IO_NAME: fn() -> i32 = input; + +#[no_mangle] +fn log(i: i32) -> () {} + +#[no_mangle] +fn app() -> () { + let x = input(); + for i in x..10 { + log(i); + } + Fresh(x); +} + +fn main() -> () { + app() +} diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile index bacbb17..4dbe6ea 100644 --- a/ocelot/AtomicRegionInference/Makefile +++ b/ocelot/AtomicRegionInference/Makefile @@ -40,6 +40,8 @@ eg11: TEST=example11 make testr eg12: TEST=example12 make testr +eg13: + TEST=example13 make testr run_eg1: TEST=example01 make run && ../../benchmarks/tests/example01.out