From 7f8db32b6a672cce2e02beac14f0d16e58b32ee9 Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Tue, 21 Nov 2023 17:12:04 -0500
Subject: [PATCH 01/18] [InferAtomsPass] Revamp pass to support the latest LLVM
 and fix/refactor code

Below are the key changes:
- Use LLVM's new pass manager, a major improvement from the legacy one.
- Fix a shortcoming of the inference algorithm to actually collect all
uses of a fresh/consistent variable.
- Optimize the inference cleanup algorithm to remove all instructions
associated with the arguments of fresh/consistent annotations.
- Thoroughly log debug messages throughout the components of the pass
for a clearer view of the process.
- Rename files, structs, functions, variables, etc. to be more
descriptive and consistent.
- General code style refactoring (e.g., use `auto` and structured
bindings (destructuring) where possible).
- Added simple C tests to `benchmarks/ctests`.
---
 .gitignore                                    |    2 +
 readme.md => README.md                        |    0
 benchmarks/ctests/example01.c                 |   21 +
 benchmarks/ctests/example02.c                 |   24 +
 ocelot/AtomicRegionInference/CMakeLists.txt   |   18 +-
 ocelot/AtomicRegionInference/README.md        |   22 +-
 .../AtomicRegionInference/src/CMakeLists.txt  |   24 +-
 .../src/ConsistentInference.cpp               |  505 --------
 ocelot/AtomicRegionInference/src/Helpers.cpp  |   36 +
 .../src/InferAtomicPass.cpp                   |  516 --------
 .../AtomicRegionInference/src/InferAtoms.cpp  |  529 ++++++++
 .../src/InferFreshCons.cpp                    |  565 ++++++++
 .../src/TaintTracker.cpp                      | 1138 +++++++++--------
 .../src/include/ConsistentInference.h         |   38 -
 .../src/include/HelperTypes.h                 |   93 +-
 .../src/include/Helpers.h                     |   15 +
 .../src/include/InferAtomicPass.h             |   68 -
 .../src/include/InferAtoms.h                  |   54 +
 .../src/include/InferFreshCons.h              |   36 +
 .../src/include/TaintTracker.h                |   39 +-
 20 files changed, 1961 insertions(+), 1782 deletions(-)
 create mode 100644 .gitignore
 rename readme.md => README.md (100%)
 create mode 100644 benchmarks/ctests/example01.c
 create mode 100644 benchmarks/ctests/example02.c
 delete mode 100644 ocelot/AtomicRegionInference/src/ConsistentInference.cpp
 create mode 100644 ocelot/AtomicRegionInference/src/Helpers.cpp
 delete mode 100644 ocelot/AtomicRegionInference/src/InferAtomicPass.cpp
 create mode 100644 ocelot/AtomicRegionInference/src/InferAtoms.cpp
 create mode 100644 ocelot/AtomicRegionInference/src/InferFreshCons.cpp
 delete mode 100644 ocelot/AtomicRegionInference/src/include/ConsistentInference.h
 create mode 100644 ocelot/AtomicRegionInference/src/include/Helpers.h
 delete mode 100644 ocelot/AtomicRegionInference/src/include/InferAtomicPass.h
 create mode 100644 ocelot/AtomicRegionInference/src/include/InferAtoms.h
 create mode 100644 ocelot/AtomicRegionInference/src/include/InferFreshCons.h

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5326aab
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.vscode
+ocelot/AtomicRegionInference/build
\ No newline at end of file
diff --git a/readme.md b/README.md
similarity index 100%
rename from readme.md
rename to README.md
diff --git a/benchmarks/ctests/example01.c b/benchmarks/ctests/example01.c
new file mode 100644
index 0000000..0e61a67
--- /dev/null
+++ b/benchmarks/ctests/example01.c
@@ -0,0 +1,21 @@
+#include <stdio.h>
+
+// int x;
+// int y;
+
+void Fresh(int x) { printf("Fresh\n"); }
+void Consistent(int x, int id) { printf("Consistent\n"); }
+
+void atomic_start() {}
+void atomic_end() {}
+
+int tmp() { return 0; }
+int (*IO_NAME1)() = tmp;
+void log(int x) {}
+
+int app() {
+  int x = tmp();
+  Fresh(x);
+  log(x);
+  return 0;
+}
\ No newline at end of file
diff --git a/benchmarks/ctests/example02.c b/benchmarks/ctests/example02.c
new file mode 100644
index 0000000..1047d9e
--- /dev/null
+++ b/benchmarks/ctests/example02.c
@@ -0,0 +1,24 @@
+void Fresh(int x) {}
+void Consistent(int x, int id) {}
+
+void atomic_start() {}
+void atomic_end() {}
+
+int sense() { return 0; }
+int (*IO_NAME)() = sense;
+
+int norm(int t) { return t; }
+
+void log(int x) {}
+
+int tmp() {
+  int t = sense();
+  int t_norm = norm(t);
+  return t_norm;
+}
+
+void app() {
+  int x = tmp();
+  Fresh(x);
+  log(x);
+}
\ No newline at end of file
diff --git a/ocelot/AtomicRegionInference/CMakeLists.txt b/ocelot/AtomicRegionInference/CMakeLists.txt
index 24e2883..81d46f5 100644
--- a/ocelot/AtomicRegionInference/CMakeLists.txt
+++ b/ocelot/AtomicRegionInference/CMakeLists.txt
@@ -1,8 +1,22 @@
-cmake_minimum_required(VERSION 3.1)
+cmake_minimum_required(VERSION 3.6)
+project(InferAtoms)
 
+# LLVM uses C++17.
+set(CMAKE_CXX_STANDARD 17)
+
+# Load LLVMConfig.cmake. If this fails, consider setting `LLVM_DIR` to point
+# to your LLVM installation's `lib/cmake/llvm` directory.
 find_package(LLVM REQUIRED CONFIG)
+
+# Include the part of LLVM's CMake libraries that defines
+# `add_llvm_pass_plugin`.
+include(AddLLVM)
+
+# Use LLVM's preprocessor definitions, include directories, and library search
+# paths.
 add_definitions(${LLVM_DEFINITIONS})
 include_directories(${LLVM_INCLUDE_DIRS})
 link_directories(${LLVM_LIBRARY_DIRS})
 
-add_subdirectory(src)  # Use your pass name here.
+# Our pass lives in this subdirectory.
+add_subdirectory(src)
diff --git a/ocelot/AtomicRegionInference/README.md b/ocelot/AtomicRegionInference/README.md
index d9a1c3a..38c61e6 100644
--- a/ocelot/AtomicRegionInference/README.md
+++ b/ocelot/AtomicRegionInference/README.md
@@ -1,14 +1,18 @@
-# region-inference-pass
+# Atomic Region Inference
 
-LLVM Pass for inferring atomic regions
+LLVM Pass for inferring atomic regions. Tested to work with LLVM 17.
 
-Build:
+To build the pass:
 
-	$ mkdir build
-	$ cd build
-	$ cmake ..
-	$ make
+```sh
+mkdir build
+cd build
+cmake ..
+make
+```
 
-Run:
+You may bootstrap Clang to use the pass to compile a C file like so:
 
-	$ opt -load build/src/libInferAtomicPass.so -atomize something.bc
+```sh
+clang -S -emit-llvm -fpass-plugin=src/InferAtomsPass.dylib -fno-discard-value-names ../../../benchmarks/ctests/example01.c
+```
diff --git a/ocelot/AtomicRegionInference/src/CMakeLists.txt b/ocelot/AtomicRegionInference/src/CMakeLists.txt
index 03033ee..e44f56e 100644
--- a/ocelot/AtomicRegionInference/src/CMakeLists.txt
+++ b/ocelot/AtomicRegionInference/src/CMakeLists.txt
@@ -1,23 +1,7 @@
-add_library(InferAtomicPass MODULE
+add_llvm_pass_plugin(InferAtomsPass
     # List your source files here.
-    InferAtomicPass.cpp
-    ConsistentInference.cpp
+    InferAtoms.cpp
     TaintTracker.cpp
+    InferFreshCons.cpp
+    Helpers.cpp
 )
-
-# Use C++11 to compile our pass (i.e., supply -std=c++11).
-target_compile_features(InferAtomicPass PRIVATE cxx_range_for cxx_auto_type)
-
-# LLVM is (typically) built with no C++ RTTI. We need to match that;
-# otherwise, we'll get linker errors about missing RTTI data.
-set_target_properties(InferAtomicPass PROPERTIES
-    COMPILE_FLAGS "-fno-rtti"
-)
-
-# Get proper shared-library behavior (where symbols are not necessarily
-# resolved when the shared library is linked) on OS X.
-if(APPLE)
-	set_target_properties(InferAtomicPass PROPERTIES
-        LINK_FLAGS "-undefined dynamic_lookup"
-    )
-endif(APPLE)
diff --git a/ocelot/AtomicRegionInference/src/ConsistentInference.cpp b/ocelot/AtomicRegionInference/src/ConsistentInference.cpp
deleted file mode 100644
index f59be4b..0000000
--- a/ocelot/AtomicRegionInference/src/ConsistentInference.cpp
+++ /dev/null
@@ -1,505 +0,0 @@
-#include "include/ConsistentInference.h"
-
-#define DEBUGINFER 0
-//Come back to this. it can crash and if pass not run with debug, shouldn't be needed
-#if 0
-namespace {
-
- // Find closest debug info. Note that LLVM throws fatal error if we don't add debug info
-// to call instructions that we insert (if the parent function has debug info).
-DebugLoc findClosestDebugLoc(Instruction *instr)
-{
-
-  DIScope *scope = instr->getFunction()->getSubprogram();
-  Instruction *instrWithDebugLoc = instr;
-  while (!instrWithDebugLoc->getDebugLoc() && instrWithDebugLoc->getPrevNode() != NULL)
-    instrWithDebugLoc = instrWithDebugLoc->getPrevNode();
-  if (instrWithDebugLoc->getDebugLoc()) // if found an instruction with info, use that info
-    return DebugLoc(instrWithDebugLoc->getDebugLoc());
-  else // use the parent function's info (can't see any better source)
-    return DebugLoc::get(instr->getFunction()->getSubprogram()->getLine(), /* col */ 0, scope);
-}
-
-} // namespace anon
-#endif
-using namespace std;
-using namespace llvm;
-Instruction* ConsistentInference::insertRegionInst(int toInsertType, Instruction* insertBefore) {
-
-  Instruction* call;
-  IRBuilder<> builder(insertBefore);
-  //build and insert a region start inst
-  if (toInsertType == 0) {
-    //Constant* c = M->getOrInsertFunction("");
-    call = builder.CreateCall(atomStart);
-    #if DEBUGINFER
-    errs() << "create start\n";
-    #endif
-  } else {
-  //build and insert a region start inst
-    #if DEBUGINFER
-      errs() << "Inserting end at: "<< *insertBefore<<"\n";
-    #endif
-    call = builder.CreateCall(atomEnd);
-    #if DEBUGINFER
-    errs() << "create end\n";
-    #endif
-  }
-  return call;
-}
-
-//if a direct pred is also a successor, then it's a for loop block
-bool ConsistentInference::loopCheck(BasicBlock* bb) {
-  StringRef bbname = bb->getName().drop_front(2);
-  if (!bb->hasNPredecessors(1)) {
-    for (auto it = pred_begin(bb), et = pred_end(bb); it != et; ++it) {
-       BasicBlock* predecessor = *it;
-       StringRef pname = predecessor->getName().drop_front(2);
-      // errs() << "comparing " << pname<< " and " <<bbname <<"\n";
-        if (pname.compare_numeric(bbname) > 0) {
-       //   errs() << "comparison is true\n";
-          return true;
-        }
-    }
-  }
-  return false;
-}
-
-
-//find the first block after a for loop
-BasicBlock* ConsistentInference::getLoopEnd(BasicBlock* bb) {
-  Instruction* ti = bb->getTerminator();
-  BasicBlock* end = ti->getSuccessor(0);
-  ti = end->getTerminator();
- // errs() << "end is " << end->getName() << "\n";
-  //for switch inst, succ 0 is the fall through
-  end = ti->getSuccessor(1);
- // errs() << "end is " << end->getName() << "\n";
-  return end;
-}
-
-/*Top level region inference function -- could flatten later*/
-void ConsistentInference::inferConsistent(std::map<int, inst_vec> allSets) 
-{
-  //TODO: start with pseudo code structure  from design doc
-  for( auto map : allSets ) {
-    #if DEBUGINFER
-    errs() << "DEBUGINFER: starting set " << map.first << "\n";
-    #endif
-    addRegion(map.second, 0);
-  }
-  
-}
-
-/*The only difference is outer map vs outer vec*/
-void ConsistentInference::inferFresh(inst_vec_vec allSets) 
-{
-  //TODO: start with pseudo code structure  from design doc
-  for( auto singleVec : allSets ) {
-    addRegion(singleVec, 1);
-  }
-  
-}
-
-//Region type: 0 for Con, 1 for fresh
-void ConsistentInference::addRegion(inst_vec conSet, int regionType) 
-{
-  //construct a map of set item to bb
-  map<Instruction*, BasicBlock*> blocks;
-  //a queue for regions that still need to be processed 
-  queue<map<Instruction*, BasicBlock*>> regionsNeeded;
-  
-  for(Instruction* item : conSet) {
-    blocks[item] = item->getParent();
-  }
-
-  regionsNeeded.push(blocks);
-
-  Function* root;
-  for (Function& f : *m) {
-    if (f.getName().equals("app")) {
-      root = &f;
-    }
-  }
-  
-  //iterate until no more possible regions
-  //THEN pick the best one 
-  vector<pair<Instruction*, Instruction*>> regionsFound;
-  while (!regionsNeeded.empty()) {
-    //need to raise all blocks in the map until 
-    //they are the same
-    map<Instruction*, BasicBlock*> blockMap = regionsNeeded.front();
-    regionsNeeded.pop();
-    //record which functions have been travelled through 
-    set<Function*> nested; 
-    
-    while (!sameFunction(blockMap)) {
-      //to think on: does this change?
-      Function* goal = commonPredecessor(blockMap, root);
-      for (Instruction* item : conSet) {
-        //not all blocks need to be moved up
-        Function* currFunc = blockMap[item]->getParent();
-        nested.insert(currFunc);
-        if(currFunc!=goal) {
-          
-          //if more than one call:
-          //callChain info is already in the starting set
-          //so only explore a caller if it's in conSet
-          bool first = true;
-          for(User* use : currFunc->users()) {
-            //if (regionType == 1) {
-              if(! (find(conSet.begin(), conSet.end(), use)!=conSet.end())) {
-                continue;
-              }
-              //errs() << "Use: "<< *use << " is in call chain\n"; 
-            //}  
-            Instruction* inst = dyn_cast<Instruction>(use);
-            #if DEBUGINFER
-            errs() << "DEBUGINFER: examining use: "<< *inst<<"\n";
-            #endif
-            if (inst == NULL) {
-              //errs () <<"ERROR: use " << *use << "not an instruction\n";
-              break;
-            }
-            //update the original map
-            if (first) {
-              blockMap[item] = inst->getParent();
-              first = false;
-            } else {
-              //copy the blockmap, update, add to queue
-              Instruction* inst = dyn_cast<Instruction>(use);
-              map<Instruction*, BasicBlock*> copy;
-              for(auto map : blockMap) {
-                copy[map.first] = map.second;
-              }
-              copy[item] = inst->getParent();
-              regionsNeeded.push(copy);
-            }
-          }//end forall uses
-        }//end currFunc check  
-      }//end forall items
-    }//end same function check
-    
-  
-  
-    /**Now, all bb in the map are in the same function, so we can run 
-     * dom or post-dom analysis on that function**/
-    #if DEBUGINFER
-    errs() << "DEBUGINFER: start dom tree analysis\n";
-    #endif
-    Function* home = blockMap.begin()->second->getParent();
-    if(home == nullptr) {
-      #if DEBUGINFER
-        errs() << "DEBUGINFER: no function found\n";
-      #endif
-      continue;
-    }
-    DominatorTree& domTree =  pass->getAnalysis<DominatorTreeWrapperPass>(*home).getDomTree();
-    //Find the closest point that dominates 
-    BasicBlock* startDom = blockMap.begin()->second;
-    for (auto map : blockMap) {
-      startDom = domTree.findNearestCommonDominator(map.second, startDom);
-    }
-    //TODO: if an inst in the set is in the bb, we can truncate?
-    #if DEBUGINFER
-    errs() << "DEBUGINFER: start post dom tree analysis\n";
-    #endif
-    //Flip directions for the region end
-    PostDominatorTree& postDomTree =  pass->getAnalysis<PostDominatorTreeWrapperPass>(*home).getPostDomTree();
-    //Find the closest point that dominates 
-    BasicBlock* endDom = blockMap.begin()->second;
-    for (auto map : blockMap) {
-      #if DEBUGINFER
-      if (endDom!=nullptr) {
-        errs() << "finding post dom of:" << map.second->getName()<< " and " << endDom->getName()<< "\n"; 
-      } else {
-        errs() << "endDom is null\n";
-      }
-      #endif
-      endDom = postDomTree.findNearestCommonDominator(map.second, endDom);
-    }
-    if (startDom==nullptr) {
-      errs() << "ERROR: null start\n";
-    } else if (endDom==nullptr) {
-      errs() << "ERROR: null end\n";
-    }
-    #if DEBUGINFER
-    errs() << "DEBUGINFER: match scope\n";
-    #endif
-    //need to make the start and end dominate each other as well.
-    startDom = domTree.findNearestCommonDominator(startDom, endDom);
-    endDom = postDomTree.findNearestCommonDominator(startDom, endDom);
-
-    //extra check to disallow loop conditional block as the end
-    if(loopCheck(endDom)) {
-      endDom = getLoopEnd(endDom);
-    }
-
-    
-
-    if (startDom==nullptr) {
-      errs() << "ERROR: null start after scope merge\n";
-    } else if (endDom==nullptr) {
-      errs() << "ERROR: null end after scope merge\n";
-    }
-#if DEBUGINFER
-    errs() << "DEBUGINFER: insert insts\n";
-#endif
-    //TODO: fallback if endDom is null? Need hyper-blocks, I think
-    //possibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations?
-    Instruction* regionStart = truncate(startDom, true, conSet, nested);
-    Instruction* regionEnd = truncate(endDom, false, conSet, nested);
-     if (regionStart==nullptr) {
-      errs() << "ERROR: null start after truncation\n";
-    } else if (regionEnd==nullptr) {
-      errs() << "ERROR: null end after truncation\n";
-    } else {
-       //errs() << "Region start is before " << *regionStart<<" and region end is before " << *regionEnd<<"\n";
-    }
-
-    //insert into regions found
-    regionsFound.push_back(make_pair(regionStart, regionEnd));
-  }//end while regions needed
-
-    //now see which region is smallest -- instruction count? they must dominate 
-    //each other, so there's no possibility of not running into the start from 
-    //the end
-    pair<Instruction*, Instruction*> smallestReg = findSmallest(regionsFound);
-    //errs() << "Smallest Region was " << *smallestReg.first<< " and " << *smallestReg.second <<"\n";
-    Instruction* regionStart = smallestReg.first;
-    Instruction* regionEnd = smallestReg.second;
-    insertRegionInst(0, regionStart);
-    insertRegionInst(1, regionEnd);
-  //}//end while regions needed
-}
-
-/*Function to truncate a bb if the instruction is in the bb */
-Instruction* ConsistentInference::truncate(BasicBlock* bb, bool forwards, inst_vec conSet, set<Function*> nested)
-{
-  //truncate the front
-  if(forwards) {
-    for (Instruction& inst : *bb) {
-      //stop at first inst in the basic block that is in the set.
-      if (find(conSet.begin(), conSet.end(), &inst)!=conSet.end()){
-        return &inst;
-      }
-      //need to stop at relevant callIsnsts as well
-      else if (CallInst* ci = dyn_cast<CallInst>(&inst)){
-        if (nested.find(ci->getCalledFunction())!=nested.end()) {
-          return &inst;
-        }
-      }
-
-    }
-    //otherwise just return the last inst
-    return &bb->back();
-  } 
-  //reverse directions if not forwards
-  Instruction* prev = NULL;
-  for(BasicBlock::reverse_iterator i = bb->rbegin(), e = bb->rend(); i!=e;++i) {
-		Instruction* inst = &*i;
-    if (find(conSet.begin(), conSet.end(), inst)!=conSet.end()){
-      //need to return the previous inst (next in fowards), as it should be inserted before the returned inst
-      
-      if (prev == NULL) {
-        //only happens if use is a ret inst, which is a scope use to make the branching 
-        //work, not an actual one, so this is safe
-        return inst;
-      } 
-      return prev;
-    }
-    else if (CallInst* ci = dyn_cast<CallInst>(inst)){
-      if (nested.find(ci->getCalledFunction())!=nested.end()) {
-        return prev;
-      }
-    }
-    prev = inst;
-  }
-  //otherwise just return first inst of the block
-  //errs() << "truncate returning " << bb->front() << "\n";
-  return &bb->front();
-}
-
-
-Function* ConsistentInference::commonPredecessor(map<Instruction*, BasicBlock*> blockMap, Function* root)
-{
-  vector<Function*> funcList;
-  //add the parents, without duplicates
-  for (auto map : blockMap) {
-    if(!(find(funcList.begin(), funcList.end(), map.second->getParent())!=funcList.end())) {
-      funcList.push_back(map.second->getParent());
-      #if DEBUGINFER
-      errs() << "DEBUGINFER: adding: " << map.second->getParent()->getName()<<"\n";
-      #endif
-    }
-  }
-  //easy case: everything is already in the same function
-  if(funcList.size()==1) {
-    return funcList.at(0);
-  }
-  /* Algo Goal: get the deepest function that still calls (or is) all funcs in funcList.
-   * Consider: multiple calls? Should be dealt with in the add region function -- eventually each caller
-   * gets its own region
-  */
-   Function* goal = nullptr;
-   //Function* root = m->getFunction("app");
-   #if DEBUGINFER
-   errs() << "DEBUGINFER: starting from " << root->getName() << "\n";
-   #endif
-   deepCaller(root, funcList, &goal);
-   if(goal == nullptr) {
-     errs() << "ERROR: deepCaller failed\n";
-   }
-   return goal;
-}
-
-/*Recursive: from a root, returns list of called funcs. */
-vector<Function*> ConsistentInference::deepCaller(Function* root, vector<Function*> funcList, Function** goal)
-{
-  vector<Function*> calledFuncs;
-  bool mustIncludeSelf = false;
-  
-  for (inst_iterator inst = inst_begin(root), E = inst_end(root); inst != E; ++inst) {
-    if(CallInst* ci = dyn_cast<CallInst>(&(*inst))) {
-      calledFuncs.push_back(ci->getCalledFunction());
-    }
-  }
-  vector<Function*> explorationList;
-  for (Function* item : funcList) {
-    
-    //skip over root or called funcs
-    if ((find(calledFuncs.begin(), calledFuncs.end(), item)!=calledFuncs.end()) || item == root) {
-      if (item == root) {
-        mustIncludeSelf = true;
-      }
-      continue;
-    }
-    explorationList.push_back(item);
-    #if DEBUGINFER
-    errs() << "need to find " << item->getName() <<"\n";
-    #endif
-  }
-  //this function is a root of a call tree that calls everything in the func List
-  if (explorationList.empty()) {
-    #if DEBUGINFER
-    errs() << "empty list\n";
-    #endif
-    *goal = root;
-    return calledFuncs;
-  }
-  //otherwise recurse
-  Function* candidate = nullptr;
-  for (Function* called : calledFuncs) {
-    vector<Function*> partial = deepCaller(called, explorationList, &candidate);
-    //if candidate is set, it means called is a root for everything in the explorationList
-    if (candidate!=nullptr) {
-      *goal = candidate;
-      #if DEBUGINFER
-      errs() << "New candidate: " << (*goal)->getName() << "\n";
-      #endif
-    }
-    //remove from explorationList, but add to calledFuncs
-    for (Function* item : partial) {
-      func_vec::iterator place = find(explorationList.begin(), explorationList.end(), item);
-      if(place!=explorationList.end()) {
-        explorationList.erase(place);
-      }
-      calledFuncs.push_back(item);
-    }
-
-  }
-  //current point is a root
-  if(explorationList.empty()) {
-    //not the deepest
-    if (candidate!=nullptr && !mustIncludeSelf) {
-      *goal = candidate;
-    } else {
-    //is the deepest  
-      *goal = root;
-    }
-  }
-  return calledFuncs;
-}
-
-
-
-
-/*Recursive: get the min of the maximum length of each regions*/
-inst_inst_pair ConsistentInference::findSmallest(vector<inst_inst_pair>regionsFound)
-{
-  inst_inst_pair best;
-  int best_count = 2147483647;
-
-  for (inst_inst_pair candidate : regionsFound) {
-    Function* root = candidate.first->getFunction();
-    int pre = 0 ;
-    int found = 0;
-    for (Instruction& inst : *candidate.first->getParent()) {
-      pre++;
-      if (&inst==candidate.first) {
-        break;
-        
-      }
-    }
-    //get the max length from the bb to the end instruction
-    vector<BasicBlock*> v;
-    int length = getSubLength(candidate.first->getParent(), candidate.second, v);
-    //substract the prefix before the start inst
-    length -= pre;
-    if (length < best_count) {
-      best_count = length;
-      best = candidate;
-      //errs() << "best candidate is " << *candidate.first << " and " <<
-      // *candidate.second << " with length " << length << "\n";
-    }
-
-  }
-  return best;
-}
-//helper func, recursive
-int ConsistentInference::getSubLength(BasicBlock* bb, Instruction* end, vector<BasicBlock*> visited){
-  int count = 0;
-  int max_ret = 0;
-  visited.push_back(bb);
-  for (Instruction& inst : *bb) {
-    count++;
-    if (&inst == end){
-      return count;
-    }
-    if(CallInst* ci = dyn_cast<CallInst>(&inst)){
-      Function* cf = ci->getCalledFunction();
-      if (!cf->empty() && cf!=NULL) {
-        //errs() <<"attempting function " << cf->getName() << "\n";
-        count+= cf->getInstructionCount();
-      }
-    }
-    if (inst.isTerminator()) {
-      int numS = inst.getNumSuccessors();
-      for (int i = 0; i < numS; i++) {
-        BasicBlock* next = inst.getSuccessor(i);
-        //already counted -- do something more fancy for loops?
-        if (find(visited.begin(), visited.end(), next)!=visited.end()) {
-          continue;
-        }
-        int intermed = getSubLength(inst.getSuccessor(i), end, visited);
-        if (intermed > max_ret) {
-          max_ret = intermed;
-        }
-      }
-    }
-  }
-  return count + max_ret;
-} 
-
-bool ConsistentInference::sameFunction(map<Instruction*, BasicBlock*> blockMap)
-{
-  Function* comp = blockMap.begin()->second->getParent();
-  for (auto map : blockMap) {
-    if (map.second->getParent()!= comp) {
-      return false;
-    }
-  }
-  return true;
-}
-
-
diff --git a/ocelot/AtomicRegionInference/src/Helpers.cpp b/ocelot/AtomicRegionInference/src/Helpers.cpp
new file mode 100644
index 0000000..a0f62cf
--- /dev/null
+++ b/ocelot/AtomicRegionInference/src/Helpers.cpp
@@ -0,0 +1,36 @@
+#include "include/Helpers.h"
+
+std::string getSimpleNodeLabel(const Value* node) {
+  if (node->hasName()) {
+    // #if DEBUG
+    //     errs() << "Node has name\n";
+    // #endif
+    return node->getName().str();
+  }
+
+  std::string str;
+  raw_string_ostream OS(str);
+
+  node->printAsOperand(OS, false);
+  return str;
+}
+
+bool isAnnot(const StringRef annotName) {
+  return annotName.equals("Fresh") || annotName.equals("Consistent") || annotName.equals("FreshConsistent");
+}
+
+void printInstInsts(const inst_insts_map& iim, bool onlyCalls) {
+  for (auto& [inst, inputs] : iim) {
+    if (!onlyCalls || isa<CallInst>(inst)) {
+      errs() << *inst << " ->\n";
+      for (auto* input : inputs) errs() << *input << "\n";
+      errs() << "\n";
+    }
+  }
+}
+
+void printInsts(const inst_vec& iv) {
+  for (auto& inst : iv) {
+    errs() << *inst << "\n";
+  }
+}
diff --git a/ocelot/AtomicRegionInference/src/InferAtomicPass.cpp b/ocelot/AtomicRegionInference/src/InferAtomicPass.cpp
deleted file mode 100644
index a105841..0000000
--- a/ocelot/AtomicRegionInference/src/InferAtomicPass.cpp
+++ /dev/null
@@ -1,516 +0,0 @@
-#include "include/InferAtomicPass.h"
-#include "include/TaintTracker.h"
-
-#define CAPSIZE 1000
-#define PRINTMAPS 1
-#define FRESHDEBUG 1
-
-void InferAtomicModulePass::removeAnnotations(inst_vec* toDelete) 
-{
- //delete all the annotation function calls
- bool instsLeftToDelete = true;
-  Instruction* candidate;
-  while (instsLeftToDelete) {
-    instsLeftToDelete = false;
-    //can't delete while directly iterating through the module
-    for (Function& f : *this->m) {
-      for (BasicBlock& bb : f) {
-        for (Instruction& inst : bb) {
-
-          //for now, let's just delete unused core or compiler builtin functions
-          if(isa<CallInst>(&inst)) {
-            if (find(toDelete->begin(), toDelete->end(), &inst)!=toDelete->end()) {
-              candidate = &inst;
-              instsLeftToDelete = true;
-              break;
-              }
-          }
-        }
-      }
-    }
-      //recheck, as this could be the last iteration
-      if(instsLeftToDelete) {
-  #if DEBUG
-        errs() << "DEBUG: deleting: " << candidate->getName() <<"\n";
-  #endif
-        candidate->replaceAllUsesWith(UndefValue::get(candidate->getType()));
-        candidate->eraseFromParent();
-      }
-    
-  }
-    //now delete all the annotation functions
-    //vector<Function*> toDeleteF;
-    bool functionsLeftToDelete = true;
-    Function* candidatef;
-    while (functionsLeftToDelete) {
-      functionsLeftToDelete = false;
-      //can't delete while directly iterating through the module
-      for (Function& f : *this->m) {
-        if (f.hasName()) {
-          //for now, let's just delete unused core or compiler builtin functions
-          if(f.getName().contains("Fresh")||f.getName().contains("Consistent")) {
-            candidatef = &f;
-            functionsLeftToDelete = true;
-            break;
-            
-          }
-        }
-
-      }
-
-      //recheck, as this could be the last iteration
-      if(functionsLeftToDelete) {
-#if DEBUG
-      errs() << "DEBUG: deleting: " << candidatef->getName() <<"\n";
-#endif
-
-      candidatef->replaceAllUsesWith(UndefValue::get(candidatef->getType()));
-      candidatef->eraseFromParent();
-    }
-  }
-}
-
-/*
- * Top-level pass for atomic region inference 
- */
-bool InferAtomicModulePass::runOnModule(Module &M) {
-  m = &M;
-  capacitorSize = CAPSIZE;
-
-  //TODO: init atomStart/End with the proper functions
-  for (Function& F : M) {
-    if (F.getName().contains("atomic_start")) {
-      #if DEBUG
-      errs() << "DEBUG: found atom start\n";
-      #endif
-      atomStart = &F;
-    }
-    if (F.getName().contains("atomic_end")) {
-      #if DEBUG
-      errs() << "DEBUG: found atom end\n";
-      #endif
-      atomEnd = &F;
-    }
-  }
-
-  //Build the consistent set and fresh lists here, to only 
-  //go through all the declarations once. 
-  std::map<int,inst_vec> conVars;
-  inst_vec_vec freshVars;
-  inst_insts_map inputInfo = buildInputs(m);
-  inst_vec toDelete;
-  getAnnotations(&conVars, &freshVars, inputInfo, &toDelete);
-  //TODO: need to add unique point of call chain prefix to con set
-  #if PRINTMAPS
-    errs () << "Initial fresh is: \n";
-    for (inst_vec item : freshVars) {
-      for (Instruction* item2 : item) {
-        errs() << *item2 << "\n";
-      }
-    }
-    errs() << "End init fresh\n";
-  #endif
-
-  #if PRINTMAPS
-    errs () << "Initial consistent is: \n";
-    for (auto map : conVars) {
-      errs() << "Begin set\n";
-      for (Instruction* item2 : map.second) {
-        errs() << *item2 << "\n";
-      }
-    }
-    errs() << "End init Consistent\n";
-  #endif
-
-  #if PRINTMAPS
-    errs() << "Printing map:\n";
-    for (auto map : inputInfo) {
-      if (isa<CallInst>(map.first)) {
-      errs() << *(map.first) << "in map\n";
-      for (Value* l : map.second) {
-        errs() << *l << "\n";
-      }
-      }
-    }
-  #endif
-  map<int,inst_vec> allConSets = collectCon(conVars, inputInfo);
-  inst_vec_vec allFresh  = collectFresh(freshVars, inputInfo);
-
-  
-  
-  #if PRINTMAPS
-    errs () << "Fresh is: \n";
-    for (inst_vec item : allFresh) {
-      for (Instruction* item2 : item) {
-        errs() << *item2 << "\n";
-      }
-    }
-    errs() << "End fresh\n";
-  #endif
-
-  #if PRINTMAPS
-    errs () << "Consistent is: \n";
-    for (auto map : allConSets) {
-      for (Instruction* item2 : map.second) {
-        errs() << *item2 << "\n";
-      }
-    }
-    errs() << "End Consistent\n";
-  #endif
-
-  
-  
-  //will do consistency first
-  ConsistentInference* ci = new ConsistentInference(this, &M, atomStart, atomEnd);
-  
-  ci->inferConsistent(allConSets);
-  ci->inferFresh(allFresh);
-  
-  //delete annotations
-  removeAnnotations(&toDelete);
-  
-  return true;
-}
-
-
-/**This function finds annotated variables)**/
-void InferAtomicModulePass::getAnnotations(std::map<int,inst_vec>* conSets, inst_vec_vec* freshVars, 
-  inst_insts_map inputMap, inst_vec* toDelete) 
-{
-  //note: delete the annotation functions afterwards
-  map<Instruction*, int> recallSet;
-  
-  for (Function& f : *m) {
-    for (BasicBlock& bb : f) {
-      for (Instruction& inst : bb) {
-        if(CallInst* ci = dyn_cast<CallInst>(&inst)) {
-          Function* called = ci->getCalledFunction();
-          //various empty or null checks
-          if (called==NULL) {
-            continue;
-          }
-          if (called->empty()||!called->hasName()) {
-            continue;
-          }
-          //covers both Consistent and FreshConsistent
-          if (called->getName().contains("Consistent")) {
-            //first para is var, second is id
-            toDelete->push_back(ci);
-            int setID;
-            //v.push_back(ai); <<-- don't actually need this?
-            //bit cast use of x, then value operand of store
-            Instruction* var = dyn_cast<Instruction>(ci->getOperand(0));
-            
-            if (var==NULL) {
-              //errs() << "error casting with " << *ci <<"\n";
-              continue;
-            }
-	    // errs() << "New consistent annot. with " << *var<<"\n";
-            Value* id = ci->getOperand(1);
-            if(ConstantInt* cint = dyn_cast<ConstantInt>(id)) {
-              setID = cint->getSExtValue();
-            }
-            queue<Value*> customUsers;
-            set<Instruction*> v;
-            //v.emplace(ci);
-            //in case var itself is iOp
-            for (Instruction* iOp : inputMap[var]) {
-              v.emplace(iOp);
-            }
-	    
-            //customUsers.push(var);
-            for (Value* use : var->users()) {
-              //don't push the annotation
-              if (use == ci) {
-                continue;
-              }
-              //errs() << "DEBUG: pushing use of var: " << *use << "\n";
-              customUsers.push(use);
-            }
-            while(!customUsers.empty()) {
-              Value* use = customUsers.front();
-              customUsers.pop();
-        //errs() << "DEBUG: use is " << *use << " of var " << *var<<"\n";
-              if (Instruction* instUse = dyn_cast<Instruction>(use)) {
-                for (Instruction* iOp : inputMap[instUse]) {
-                      v.emplace(iOp);
-            //  errs() << "DEBUG: adding to v  " << *iOp << "\n";
-                }  
-              }
-              if(isa<BitCastInst>(use)||isa<ZExtInst>(use)) {
-                for (Value* use2 : use->users()) {
-                 // errs() << "DEBUG: use2 is " << *use2 << "\n";
-                  if(StoreInst* si = dyn_cast<StoreInst>(use2)){
-                    for (Instruction* iOp : inputMap[si]) {
-                      v.emplace(iOp);
-                  //    errs() << "DEBUG: adding to v  " << *iOp << "\n";
-                    }
-                  }
-                //  errs() << "DEBUG: pushing use2 of var: " << *use2 << "\n";
-                  customUsers.push(use2);
-                }
-              }
-
-              if(isa<GetElementPtrInst>(use)) {
-                for (Value* use2 : use->users()) {
-              //    errs() << "DEBUG: use2 is " << *use2 << "\n";
-                  if(StoreInst* si = dyn_cast<StoreInst>(use2)){
-                      //v.push_back(si);
-                    for (Instruction* iOp : inputMap[si]) {
-                      v.emplace(iOp);
-                //      errs() << "DEBUG: adding to v  " << *iOp << "\n";
-                    }
-                  }
-              //    errs() << "DEBUG: pushing use2 of var: " << *use2 << "\n";
-                  customUsers.push(use2);
-                }
-              }
-            }
-	    //last case
-	    if (v.empty()) {
-	      //some entries have a first link with ci, not var
-	    
-	      for (Instruction* iOp : inputMap[ci]) {
-		if (inputMap[ci].size() == 1) {
-		  for (Instruction* origLink : inputMap[iOp]) {
-		    v.emplace(origLink);
-		  }
-		} else {
-		  v.emplace(iOp);
-		}
-	      
-	      }
-	     
-
-	    }
-	     //for later deletion purposes
-	    inputMap.erase(ci);
-            
-              
-            if (!v.empty()) {
-              inst_vec temp;
-              for (Instruction* item : v) {
-                temp.push_back(item);
-              }
-              //add the collected list to the map
-              if(conSets->find(setID)!=conSets->end()) {
-                conSets->at(setID).insert(conSets->at(setID).end(), temp.begin(), temp.end());
-              } else {
-                conSets->emplace(setID, temp);
-              }
-            }
-            
-          }
-          if (called->getName().contains("Fresh")) {
-            set<Instruction*> v;
-            toDelete->push_back(ci);
-            inputMap.erase(ci);
-            Value* var = ci->getOperand(0);
-            if (Instruction* inst = dyn_cast<Instruction>(var)) {
-              v.emplace(inst);
-            } else {
-              //errs() << "error casting\n";
-            }
-	    //errs() << "New Fresh annot. with " << *var<<"\n";
-           // v.push_back(ci);
-              
-            for(Value* use : var->users()) {
-              if(StoreInst* si = dyn_cast<StoreInst>(use)){
-            //     errs() << "DEBUG: pushing " << *use << "\n";
-                v.emplace(si);
-              }
-              if(isa<GetElementPtrInst>(use)) {
-                for (Value* use2 : use->users()) {
-                //   errs() << "DEBUG: pushing " << *use2 << "\n";
-                  if(StoreInst* si = dyn_cast<StoreInst>(use2)){
-                    v.emplace(si);
-                  }
-                }
-              }
-            }
-            if (!v.empty()) {
-              inst_vec temp;
-              for (Instruction* item : v) {
-                temp.push_back(item);
-              }
-              freshVars->push_back(temp);
-            }
-          }
-            
-        }
-
-      }
-    }
-  }
-
-}
-  
-
-
-
-/*Given the starting point annotations of conSets, find the 
-deepest unique point of the call chain*/
-map<int, inst_vec> InferAtomicModulePass::collectCon(map<int, inst_vec> startingPoints, inst_insts_map inputMap)
-{
-  map<int, inst_vec> toReturn;
-  for (pair<int, inst_vec> iv : startingPoints ) {
-    set<Instruction*> unique;
-    map<Instruction*,set<Instruction*>> callChains;
-    //each item should be the starting point from a different annot
-    for(Instruction* item : iv.second) {
-      #if FRESHDEBUG
-        errs() << "Starting point: " << *item << "\n";
-      #endif
-      //add self to call chain
-      callChains[item].insert(item);
-
-      for (Instruction* iOp : inputMap[item]) {
-	//    unique.insert(iOp);
-        callChains[item].insert(iOp);
-        queue<Instruction*> toExplore;
-        toExplore.push(iOp);
-        while (!toExplore.empty()) {
-          Instruction* curr = toExplore.front();
-          toExplore.pop();
-          for (Instruction* intermed : inputMap[curr]) {
-            if (! (find(callChains[item].begin(), callChains[item].end(), intermed)
-            !=callChains[item].end())) {
-              callChains[item].insert(intermed);
-              toExplore.push(intermed);
-            }
-          }
-        }
-
-      }// finish constructing call chain for one annot. in the set      
-      
-    }//constructed call chains for ALL annot. in the set.
-    //now check the call chain
-    
-    //int index = 0;
-    //map<Instruction*,bool> foundUniquePoint;
-    //clean up the call chains
-    
-    for(auto ccmap : callChains) {
-      for (Instruction* possibility : ccmap.second) {
-        //if the link is in the same function, then continue
-	//errs() << "examining possibility: " << *possibility << "\n";
-	bool sf = false;
-	for (Instruction* link : inputMap[possibility]) {
-	  //errs() << "next link is" << *link << "\n";
-	  if ((link!=possibility) && link->getFunction() == possibility->getFunction()) {
-	      sf = true;
-	      
-	  }
-	}
-	if (sf) {
-	  continue;
-	}
-        bool isUnique = true;
-        for (auto ccmapNest : callChains) {
-          //if self then skip
-          if (ccmapNest == ccmap) {
-            continue;
-          }
-          //otherwise check if this map also contains the possibility
-          if (find(ccmapNest.second.begin(), ccmapNest.second.end(), possibility)
-          != ccmapNest.second.end())
-          {
-            isUnique = false;
-            break;
-          }
-        }
-        if (isUnique){
-          unique.insert(possibility);
-	  //  errs() << "Found unique!" << *possibility << "\n";
-	} else {
-          //try another poss.
-          continue;
-        }
-      }
-    }
-    
-
-    inst_vec v;
-    for (Instruction* item2 : unique) {
-      if (!isa<AllocaInst>(item2)) {
-        v.push_back(item2);
-      }
-    }
-    toReturn[iv.first] = v;  
-  }//end starting point check
-
-  return toReturn;
-}
-
-/*This function collects the input srcs and uses off of the fresh annotated vars*/
-inst_vec_vec InferAtomicModulePass::collectFresh(inst_vec_vec startingPoints, inst_insts_map inputMap)
-{
-  inst_vec_vec toReturn;
-  
-  for (inst_vec iv : startingPoints ) {
-    set<Instruction*> unique;
-    set<Instruction*> callChain;
-    for(Instruction* item : iv) {
-      #if FRESHDEBUG
-        errs() << "Starting point: " << *item << "\n";
-      #endif
-      //uses (forwards) are direct only (might need a little chaining for direct in rs to be direct in IR)
-      inst_vec uses = traverseDirectUses(item);
-
-      for (Instruction* use : uses) {
-        #if FRESHDEBUG
-        errs() << "Starting point use: " << *use << "\n";
-      #endif
-       // if (isa<StoreInst>(use)||isa<CallInst>(use)) {
-          unique.insert(use);
-        //}
-        for (Instruction* iOp : inputMap[use]) {
-          unique.insert(iOp);
-        }
-      }
-
-      for (Instruction* iOp : inputMap[item]) {
-        unique.insert(iOp);
-        callChain.insert(iOp);
-        queue<Instruction*> toExplore;
-        toExplore.push(iOp);
-        while (!toExplore.empty()) {
-          Instruction* curr = toExplore.front();
-          toExplore.pop();
-          for (Instruction* intermed : inputMap[curr]) {
-            if (! (find(callChain.begin(), callChain.end(), intermed)!=callChain.end())) {
-              callChain.insert(intermed);
-              toExplore.push(intermed);
-            }
-          }
-        }
-
-      }
-      //don't forget the item itself
-      if (isa<StoreInst>(item)||isa<CallInst>(item)) {
-        unique.insert(item);
-      }
-      
-      
-    }
-    //now construct the call chain
-    for (Instruction* vv : callChain) {
-      //   errs() << "call chain val: " << *vv <<"\n";
-      unique.insert(vv);
-    }
-    inst_vec v;
-    for (Instruction* item2 : unique) {
-      if (!isa<AllocaInst>(item2)) {
-        v.push_back(item2);
-      }
-    }
-    toReturn.push_back(v);  
-  }
-
-  
-  return toReturn;
-}
-
-char InferAtomicModulePass::ID = 0;
-
-RegisterPass<InferAtomicModulePass> X("atomize", "Infer Atomic Pass");
diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
new file mode 100644
index 0000000..3843383
--- /dev/null
+++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
@@ -0,0 +1,529 @@
+#include "include/InferAtoms.h"
+
+#define CAPSIZE 1000
+
+// Top-level pass for atomic region inference
+PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) {
+  PassBuilder PB;
+  FunctionAnalysisManager FAM;
+  PB.registerFunctionAnalyses(FAM);
+
+  setModule(&M);
+
+  for (auto& F : M) {
+    auto FName = F.getName();
+    if (FName.equals("atomic_start")) {
+#if DEBUG
+      errs() << "Found atomic_start\n";
+#endif
+      atomStart = &F;
+    } else if (FName.equals("atomic_end")) {
+#if DEBUG
+      errs() << "Found atomic_end\n";
+#endif
+      atomEnd = &F;
+    }
+  }
+
+  // Build the consistent set and fresh lists here,
+  // to only go through all the declarations once.
+  std::map<int, inst_vec> consVars;
+  inst_vec_vec freshVars;
+  inst_insts_map inputMap = buildInputs(this->M);
+  inst_vec toDelete;
+  getAnnotations(&consVars, &freshVars, inputMap, &toDelete);
+  // TODO: need to add unique point of call chain prefix to cons set
+
+#if DEBUG
+  errs() << "Initial Fresh:\n";
+  for (auto& insts : freshVars)
+    for (auto* inst : insts) errs() << *inst << "\n";
+#endif
+
+#if DEBUG
+  errs() << "Initial Consistent:\n";
+  for (auto& [_, insts] : consVars) {
+    for (auto* inst : insts) errs() << *inst << "\n";
+  }
+#endif
+
+#if DEBUG
+  errs() << "Print inputMap CallInst entries:\n";
+  printInstInsts(inputMap, true);
+#endif
+
+  auto allConsSets = collectCons(consVars, inputMap);
+  auto allFresh = collectFresh(freshVars, inputMap);
+
+#if DEBUG
+  errs() << "Fresh after collect: \n";
+  for (auto& varSet : allFresh)
+    for (auto* var : varSet) errs() << *var << "\n";
+#endif
+
+#if DEBUG
+  errs() << "Consistent after collect: \n";
+  for (auto& [_, insts] : allConsSets)
+    for (auto* inst : insts) errs() << *inst << "\n";
+#endif
+
+  // Consistent first
+  InferFreshCons* ci = new InferFreshCons(&FAM, &M, atomStart, atomEnd);
+
+  ci->inferConsistent(allConsSets);
+  ci->inferFresh(allFresh);
+
+  // Delete annotations
+  removeAnnotations(&toDelete);
+
+  return PreservedAnalyses::none();
+}
+
+// This function finds annotated variables
+void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_vec* freshVars,
+                                    inst_insts_map inputMap, inst_vec* toDelete) {
+#if DEBUG
+  errs() << "=== getAnnotations ===\n";
+#endif
+  for (auto& F : *this->M) {
+    for (auto& B : F) {
+      for (auto& I : B) {
+        if (auto* ci = dyn_cast<CallInst>(&I)) {
+#if DEBUG
+          errs() << "[Loop Inst] cur inst = CallInst\n";
+#endif
+          auto* fun = ci->getCalledFunction();
+          // Various empty or null checks
+          if (fun == NULL || fun->empty() || !fun->hasName()) continue;
+          // Consistent and FreshConsistent
+          if (isAnnot(fun->getName()) && !fun->getName().equals("Fresh")) {
+#if DEBUG
+            errs() << "[Loop Inst] Calls Consistent/FreshConsistent\n";
+#endif
+            toDelete->push_back(ci);
+            // First para is var, second is id
+            int setID;
+            // Bit cast use of x, then value operand of store
+            Instruction* var = dyn_cast<Instruction>(ci->getOperand(0));
+
+            if (var == NULL) continue;
+            // errs() << "New consistent annot. with " << *var<<"\n";
+            Value* id = ci->getOperand(1);
+            if (ConstantInt* cint = dyn_cast<ConstantInt>(id)) {
+              setID = cint->getSExtValue();
+            }
+            std::queue<Value*> customUsers;
+            std::set<Instruction*> v;
+            // v.emplace(ci);
+            // in case var itself is iOp
+            for (Instruction* iOp : inputMap[var]) {
+              v.emplace(iOp);
+            }
+
+            // customUsers.push(var);
+            for (Value* use : var->users()) {
+              // don't push the annotation
+              if (use == ci) {
+                continue;
+              }
+              // errs() << "DEBUG: pushing use of var: " << *use << "\n";
+              customUsers.push(use);
+            }
+            while (!customUsers.empty()) {
+              Value* use = customUsers.front();
+              customUsers.pop();
+              // errs() << "DEBUG: use is " << *use << " of var " << *var<<"\n";
+              if (Instruction* instUse = dyn_cast<Instruction>(use)) {
+                for (Instruction* iOp : inputMap[instUse]) {
+                  v.emplace(iOp);
+                  //  errs() << "DEBUG: adding to v  " << *iOp << "\n";
+                }
+              }
+              if (isa<BitCastInst>(use) || isa<ZExtInst>(use)) {
+                for (Value* use2 : use->users()) {
+                  // errs() << "DEBUG: use2 is " << *use2 << "\n";
+                  if (StoreInst* si = dyn_cast<StoreInst>(use2)) {
+                    for (Instruction* iOp : inputMap[si]) {
+                      v.emplace(iOp);
+                      //    errs() << "DEBUG: adding to v  " << *iOp << "\n";
+                    }
+                  }
+                  //  errs() << "DEBUG: pushing use2 of var: " << *use2 << "\n";
+                  customUsers.push(use2);
+                }
+              }
+
+              if (isa<GetElementPtrInst>(use)) {
+                for (Value* use2 : use->users()) {
+                  //    errs() << "DEBUG: use2 is " << *use2 << "\n";
+                  if (StoreInst* si = dyn_cast<StoreInst>(use2)) {
+                    // v.push_back(si);
+                    for (Instruction* iOp : inputMap[si]) {
+                      v.emplace(iOp);
+                      //      errs() << "DEBUG: adding to v  " << *iOp << "\n";
+                    }
+                  }
+                  //    errs() << "DEBUG: pushing use2 of var: " << *use2 << "\n";
+                  customUsers.push(use2);
+                }
+              }
+            }
+            // last case
+            if (v.empty()) {
+              // some entries have a first link with ci, not var
+
+              for (Instruction* iOp : inputMap[ci]) {
+                if (inputMap[ci].size() == 1) {
+                  for (Instruction* origLink : inputMap[iOp]) {
+                    v.emplace(origLink);
+                  }
+                } else {
+                  v.emplace(iOp);
+                }
+              }
+            }
+            // for later deletion purposes
+            inputMap.erase(ci);
+
+            if (!v.empty()) {
+              inst_vec temp;
+              for (Instruction* item : v) {
+                temp.push_back(item);
+              }
+              // add the collected list to the map
+              if (consVars->find(setID) != consVars->end()) {
+                consVars->at(setID).insert(consVars->at(setID).end(), temp.begin(), temp.end());
+              } else {
+                consVars->emplace(setID, temp);
+              }
+            }
+          } else if (fun->getName().equals("Fresh")) {
+#if DEBUG
+            errs() << "[Loop Inst] Calls Fresh\n";
+#endif
+            std::set<Instruction*> v;
+            toDelete->push_back(ci);
+
+#if DEBUG
+            errs() << "[Loop Inst] Print inputMap entries:\n";
+            printInstInsts(inputMap);
+#endif
+
+            //* Can't actually remove, otherwise wrong result
+            // #if DEBUG
+            //             errs() << "[Loop Inst] Remove Fresh call from inputMap\n";
+            // #endif
+            //             inputMap.erase(ci);
+
+            auto* arg = ci->getOperand(0);
+#if DEBUG
+            errs() << "[Loop Inst] Fresh arg: " << *arg << "\n";
+#endif
+            if (auto* inst = dyn_cast<Instruction>(arg)) {
+#if DEBUG
+              errs() << "[Loop Inst] arg = Instruction, add to v\n";
+#endif
+              v.emplace(inst);
+
+              //* Actually collect all uses (e.g., log(x))
+              if (auto* li = dyn_cast<LoadInst>(inst)) {
+#if DEBUG
+                errs() << "[Loop Inst] Further arg = LoadInst\n";
+#endif
+                auto* ptr = li->getPointerOperand();
+#if DEBUG
+                errs() << "[Loop Inst] Ptr operand: " << *ptr << "\n";
+#endif
+                for (auto* ptrUse : ptr->users()) {
+#if DEBUG
+                  errs() << "[Loop ptr users] ptrUse: " << *ptrUse << "\n";
+#endif
+                  if (ptrUse != inst) {
+                    if (auto* liUse = dyn_cast<LoadInst>(ptrUse)) {
+                      errs() << "[Loop ptr users] Diff LoadInst ptrUse, add to v\n";
+                      v.emplace(liUse);
+                    }
+                  }
+                }
+              }
+            } else {
+              // errs() << "error casting\n";
+            }
+            // errs() << "New Fresh annot. with " << *var<<"\n";
+            // v.push_back(ci);
+
+#if DEBUG
+            errs() << "[Loop Inst] Go over arg users\n";
+#endif
+            for (auto* use : arg->users()) {
+              if (auto* si = dyn_cast<StoreInst>(use)) {
+#if DEBUG
+                errs() << "[Loop Users] use = StoreInst, add to v: " << *si << "\n";
+#endif
+                v.emplace(si);
+              } else if (isa<GetElementPtrInst>(use)) {
+                for (auto* use2 : use->users()) {
+                  if (auto* si = dyn_cast<StoreInst>(use2)) {
+                    v.emplace(si);
+                  }
+                }
+              }
+            }
+
+            if (!v.empty()) {
+#if DEBUG
+              errs() << "[Loop Inst] Add v's insts to a set in freshVars:\n";
+#endif
+              inst_vec tmp;
+              for (auto* inst : v) {
+#if DEBUG
+                errs() << "[Loop v] " << *inst << "\n";
+#endif
+                tmp.push_back(inst);
+              }
+              freshVars->push_back(tmp);
+            }
+          }
+        }
+      }
+    }
+  }
+
+#if DEBUG
+  errs() << "*** getAnnotations ***\n";
+#endif
+}
+
+void InferAtomsPass::removeAnnotations(inst_vec* toDelete) {
+  std::vector<Function*> toDeleteF;
+
+  // Delete all annotation function calls
+  for (auto& F : *this->M) {
+    if (F.hasName() && isAnnot(F.getName()))
+      toDeleteF.push_back(&F);
+    else
+      for (auto& B : F) {
+        auto I = B.begin();
+        for (; I != B.end(); I++) {
+          if (auto* ci = dyn_cast<CallInst>(I)) {
+            // TODO: no need to confirm in toDelete?
+            if (std::find(toDelete->begin(), toDelete->end(), &*I) != toDelete->end()) {
+#if DEBUG
+              errs() << "Remove call: " << *I << "\n";
+#endif
+              I->replaceAllUsesWith(UndefValue::get(I->getType()));
+              I = I->eraseFromParent();
+
+              //* Remove args and their uses as well
+              for (auto& arg : ci->args()) {
+                if (auto* argInst = dyn_cast<Instruction>(arg)) {
+#if DEBUG
+                  errs() << "Remove call arg: " << *argInst << "\n";
+#endif
+                  argInst->eraseFromParent();
+                  argInst->replaceAllUsesWith(UndefValue::get(argInst->getType()));
+                }
+              }
+            }
+          }
+        }
+      }
+  }
+
+  // Delete all annotation function defs
+  for (auto F : toDeleteF) {
+#if DEBUG
+    errs() << "Remove function " << F->getName() << "\n";
+#endif
+    F->replaceAllUsesWith(UndefValue::get(F->getType()));
+    F->eraseFromParent();
+  }
+}
+
+/*Given the starting point annotations of conSets, find the
+deepest unique point of the call chain*/
+std::map<int, inst_vec> InferAtomsPass::collectCons(std::map<int, inst_vec> startingPoints, inst_insts_map inputMap) {
+  std::map<int, inst_vec> toReturn;
+  for (std::pair<int, inst_vec> iv : startingPoints) {
+    std::set<Instruction*> unique;
+    std::map<Instruction*, std::set<Instruction*>> callChains;
+    // each item should be the starting point from a different annot
+    for (Instruction* item : iv.second) {
+#if DEBUG
+      errs() << "Starting point: " << *item << "\n";
+#endif
+      // add self to call chain
+      callChains[item].insert(item);
+
+      for (Instruction* iOp : inputMap[item]) {
+        //    unique.insert(iOp);
+        callChains[item].insert(iOp);
+        std::queue<Instruction*> toExplore;
+        toExplore.push(iOp);
+        while (!toExplore.empty()) {
+          Instruction* curr = toExplore.front();
+          toExplore.pop();
+          for (Instruction* intermed : inputMap[curr]) {
+            if (!(find(callChains[item].begin(), callChains[item].end(), intermed) != callChains[item].end())) {
+              callChains[item].insert(intermed);
+              toExplore.push(intermed);
+            }
+          }
+        }
+
+      }  // finish constructing call chain for one annot. in the set
+
+    }  // constructed call chains for ALL annot. in the set.
+    // now check the call chain
+
+    // int index = 0;
+    // map<Instruction*,bool> foundUniquePoint;
+    // clean up the call chains
+
+    for (auto ccmap : callChains) {
+      for (Instruction* possibility : ccmap.second) {
+        // if the link is in the same function, then continue
+        // errs() << "examining possibility: " << *possibility << "\n";
+        bool sf = false;
+        for (Instruction* link : inputMap[possibility]) {
+          // errs() << "next link is" << *link << "\n";
+          if ((link != possibility) && link->getFunction() == possibility->getFunction()) {
+            sf = true;
+          }
+        }
+        if (sf) {
+          continue;
+        }
+        bool isUnique = true;
+        for (auto ccmapNest : callChains) {
+          // if self then skip
+          if (ccmapNest == ccmap) {
+            continue;
+          }
+          // otherwise check if this map also contains the possibility
+          if (find(ccmapNest.second.begin(), ccmapNest.second.end(), possibility) != ccmapNest.second.end()) {
+            isUnique = false;
+            break;
+          }
+        }
+        if (isUnique) {
+          unique.insert(possibility);
+          //  errs() << "Found unique!" << *possibility << "\n";
+        } else {
+          // try another poss.
+          continue;
+        }
+      }
+    }
+
+    inst_vec v;
+    for (Instruction* item2 : unique) {
+      if (!isa<AllocaInst>(item2)) {
+        v.push_back(item2);
+      }
+    }
+    toReturn[iv.first] = v;
+  }  // end starting point check
+
+  return toReturn;
+}
+
+// Collects the source inputs and uses of Fresh-annotated vars
+inst_vec_vec InferAtomsPass::collectFresh(inst_vec_vec freshVars, inst_insts_map inputMap) {
+#if DEBUG
+  errs() << "=== collectFresh ===\n";
+#endif
+  inst_vec_vec toReturn;
+
+#if DEBUG
+  errs() << "Go over fresh var sets\n";
+#endif
+  for (auto varSet : freshVars) {
+#if DEBUG
+    errs() << "[Loop freshVars] Go over varSet:\n";
+    printInsts(varSet);
+#endif
+    std::set<Instruction*> unique, callChain;
+    for (auto* var : varSet) {
+#if DEBUG
+      errs() << "[Loop varSet] Cur var: " << *var << "\n";
+#endif
+      // Uses (forwards) are direct only (might need a little chaining for direct in rs to be direct in IR)
+      inst_vec uses = traverseDirectUses(var);
+
+#if DEBUG
+      errs() << "[Loop varSet] Go over uses of var\n";
+#endif
+      for (auto* use : uses) {
+#if DEBUG
+        errs() << "[Loop uses] Cur use: " << *use << "\n";
+        errs() << "[Loop uses] Add use to unique\n";
+#endif
+        unique.insert(use);
+        for (auto* input : inputMap[use]) {
+#if DEBUG
+          errs() << "[Loop inputMap[use]] Add src input of use to unique: " << *input << "\n";
+#endif
+          unique.insert(input);
+        }
+      }
+
+#if DEBUG
+      errs() << "[Loop varSet] Go over src inputs of var\n";
+#endif
+      for (auto* input : inputMap[var]) {
+#if DEBUG
+        errs() << "[Loop inputMap[var]] Cur src input: " << *input << "\n";
+#endif
+        unique.insert(input);
+        callChain.insert(input);
+        std::queue<Instruction*> toExplore;
+        toExplore.push(input);
+        while (!toExplore.empty()) {
+          Instruction* curr = toExplore.front();
+          toExplore.pop();
+          for (Instruction* intermed : inputMap[curr]) {
+            if (!(find(callChain.begin(), callChain.end(), intermed) != callChain.end())) {
+              callChain.insert(intermed);
+              toExplore.push(intermed);
+            }
+          }
+        }
+      }
+
+      // Add the var itself
+      if (isa<StoreInst>(var) || isa<CallInst>(var)) {
+#if DEBUG
+        errs() << "[Loop varSet] Cur var = StoreInst/CallInst, add to unique\n";
+#endif
+        unique.insert(var);
+      }
+    }
+    // Now construct the call chain
+    for (auto* vv : callChain) {
+      unique.insert(vv);
+    }
+    inst_vec v;
+#if DEBUG
+    errs() << "[Loop freshVars] Go over unique\n";
+#endif
+    for (auto* inst : unique) {
+      if (!isa<AllocaInst>(inst)) {
+#if DEBUG
+        errs() << "[Loop unique] Cur inst != AllocaInst, add to v: " << *inst << "\n";
+#endif
+        v.push_back(inst);
+      }
+    }
+
+#if DEBUG
+    errs() << "[Loop FreshVars] Add v to toReturn\n";
+#endif
+    toReturn.push_back(v);
+  }
+
+#if DEBUG
+  errs() << "*** collectFresh ***\n";
+#endif
+  return toReturn;
+}
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
new file mode 100644
index 0000000..6258661
--- /dev/null
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -0,0 +1,565 @@
+#include "include/InferFreshCons.h"
+
+#include "llvm/Analysis/PostDominators.h"
+
+Instruction* InferFreshCons::insertRegionInst(int toInsertType, Instruction* insertBefore) {
+#if DEBUG
+  errs() << "=== insertRegionInst ===\n";
+#endif
+  Instruction* call;
+  IRBuilder<> builder(insertBefore);
+  // Insert a region start inst
+  if (toInsertType == 0) {
+#if DEBUG
+    errs() << "Insert start before: " << *insertBefore << "\n";
+#endif
+    call = builder.CreateCall(this->atomStart);
+  } else {
+    // Insert a region end inst
+#if DEBUG
+    errs() << "Insert end before: " << *insertBefore << "\n";
+#endif
+    call = builder.CreateCall(atomEnd);
+  }
+
+#if DEBUG
+  errs() << "*** insertRegionInst ***\n";
+#endif
+  return call;
+}
+
+// If a direct pred is also a successor, then it's a for loop block
+bool InferFreshCons::loopCheck(BasicBlock* B) {
+  auto BName = getSimpleNodeLabel(B);
+  if (!B->hasNPredecessors(1)) {
+    for (auto it = pred_begin(B), et = pred_end(B); it != et; ++it) {
+      BasicBlock* predecessor = *it;
+      StringRef pname = predecessor->getName().drop_front(2);
+      // errs() << "comparing " << pname<< " and " <<bbname <<"\n";
+      if (pname.compare_numeric(BName) > 0) {
+        //   errs() << "comparison is true\n";
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+// Find the first block after a for loop
+BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) {
+  Instruction* ti = bb->getTerminator();
+  BasicBlock* end = ti->getSuccessor(0);
+  ti = end->getTerminator();
+  // errs() << "end is " << end->getName() << "\n";
+  // for switch inst, succ 0 is the fall through
+  end = ti->getSuccessor(1);
+  // errs() << "end is " << end->getName() << "\n";
+  return end;
+}
+
+// Top level region inference function -- could flatten later
+void InferFreshCons::inferConsistent(std::map<int, inst_vec> consSets) {
+  // TODO: start with pseudo code structure from design doc
+  for (auto [id, set] : consSets) {
+#if DEBUG
+    errs() << "[InferConsistent] starting set " << id << "\n";
+#endif
+    addRegion(set, 0);
+  }
+}
+
+// The only difference is outer map vs outer vec
+void InferFreshCons::inferFresh(inst_vec_vec freshSets) {
+#if DEBUG
+  errs() << "=== inferFresh ===\n";
+#endif
+  // TODO: start with pseudo code structure from design doc
+  for (auto set : freshSets) addRegion(set, 1);
+#if DEBUG
+  errs() << "*** inferFresh ***\n";
+#endif
+}
+
+// Region type: 0 for Consistent, 1 for Fresh
+void InferFreshCons::addRegion(inst_vec set, int regionType) {
+#if DEBUG
+  errs() << "=== addRegion ===\n";
+#endif
+  // A map from set item to bb
+  std::map<Instruction*, BasicBlock*> blocks;
+  // A queue of regions that still need to be processed
+  std::queue<std::map<Instruction*, BasicBlock*>> regionsNeeded;
+
+#if DEBUG
+  errs() << "Build map from inst to bb\n";
+#endif
+  for (auto* item : set) blocks[item] = item->getParent();
+
+#if DEBUG
+  errs() << "Add map to regionsNeeded\n";
+#endif
+  regionsNeeded.push(blocks);
+
+  auto* root = m->getFunction("app");
+
+  // Iterate until no more possible regions, then pick the best one
+  inst_inst_vec regionsFound;
+  while (!regionsNeeded.empty()) {
+    // Need to raise all blocks in the map until they are the same
+    auto blockMap = regionsNeeded.front();
+    regionsNeeded.pop();
+    // Record which functions have been travelled through
+    std::set<Function*> nested;
+
+#if DEBUG
+    errs() << "[Loop regionsNeeded] Check if blocks are in diff functions\n";
+#endif
+    while (!sameFunction(blockMap)) {
+      // To think on: does this change?
+      auto* goal = findCandidate(blockMap, root);
+#if DEBUG
+      errs() << "[Loop !sameFunction] Go over each item in set\n";
+#endif
+      for (auto* item : set) {
+        // not all blocks need to be moved up
+        Function* currFunc = blockMap[item]->getParent();
+        nested.insert(currFunc);
+        if (currFunc != goal) {
+          // if more than one call:
+          // callChain info is already in the starting set
+          // so only explore a caller if it's in conSet
+          bool first = true;
+          for (User* use : currFunc->users()) {
+            // if (regionType == 1) {
+            if (!(find(set.begin(), set.end(), use) != set.end())) {
+              continue;
+            }
+            // errs() << "Use: "<< *use << " is in call chain\n";
+            //}
+            Instruction* inst = dyn_cast<Instruction>(use);
+#if DEBUGINFER
+            errs() << "DEBUGINFER: examining use: " << *inst << "\n";
+#endif
+            if (inst == NULL) {
+              // errs () <<"ERROR: use " << *use << "not an instruction\n";
+              break;
+            }
+            // update the original map
+            if (first) {
+              blockMap[item] = inst->getParent();
+              first = false;
+            } else {
+              // copy the blockmap, update, add to queue
+              Instruction* inst = dyn_cast<Instruction>(use);
+              std::map<Instruction*, BasicBlock*> copy;
+              for (auto map : blockMap) {
+                copy[map.first] = map.second;
+              }
+              copy[item] = inst->getParent();
+              regionsNeeded.push(copy);
+            }
+          }  // end forall uses
+        }    // end currFunc check
+      }      // end forall items
+    }        // end same function check
+
+// Now, all bbs in the map are in the same function, so we can run
+// dom or post-dom analysis on that function
+#if DEBUG
+    errs() << "[Loop regionsNeeded] Start dom tree analysis\n";
+#endif
+    auto* home = blockMap.begin()->second->getParent();
+    if (home == nullptr) {
+#if DEBUG
+      errs() << "[Loop regionsNeeded] No function found\n";
+#endif
+      continue;
+    }
+#if DEBUG
+    errs() << "[Loop regionsNeeded] Found home fun: " << home->getName() << "\n";
+#endif
+    auto& domTree = FAM->getResult<DominatorTreeAnalysis>(*home);
+    // Find the closest point that dominates
+    auto* startDom = blockMap.begin()->second;
+    for (auto& [_, B] : blockMap) {
+      startDom = domTree.findNearestCommonDominator(B, startDom);
+    }
+#if DEBUG
+    errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n";
+#endif
+// TODO: if an inst in the set is in the bb, we can truncate?
+#if DEBUG
+    errs() << "Start post dom tree analysis\n";
+#endif
+    // Flip directions for the region end
+    auto& postDomTree = FAM->getResult<PostDominatorTreeAnalysis>(*home);
+    // Find the closest point that dominates
+    auto* endDom = blockMap.begin()->second;
+    for (auto map : blockMap) {
+#if DEBUGINFER
+      if (endDom != nullptr) {
+        errs() << "Finding post dom of: " << getSimpleNodeLabel(map.second) << " and " << getSimpleNodeLabel(endDom) << "\n";
+      } else {
+        errs() << "endDom is null\n";
+      }
+#endif
+      endDom = postDomTree.findNearestCommonDominator(map.second, endDom);
+    }
+#if DEBUG
+    errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n";
+#endif
+
+    if (startDom == nullptr) {
+      errs() << "[Error] Null startDom\n";
+    } else if (endDom == nullptr) {
+      errs() << "[Error] Null endDom\n";
+    }
+    // Need to make the start and end dominate each other as well.
+    startDom = domTree.findNearestCommonDominator(startDom, endDom);
+    endDom = postDomTree.findNearestCommonDominator(startDom, endDom);
+#if DEBUG
+    errs() << "[Loop regionsNeeded] After matching scope\n";
+    errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n";
+    errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n";
+#endif
+
+    // Extra check to disallow loop conditional block as the end
+    if (loopCheck(endDom)) {
+#if DEBUG
+      errs() << "[Loop regionsNeeded] Loop check passed\n";
+#endif
+      endDom = getLoopEnd(endDom);
+    }
+
+    if (startDom == nullptr) {
+      errs() << "[Error] Null startDom after scope merge\n";
+    } else if (endDom == nullptr) {
+      errs() << "[Error] Null endDom after scope merge\n";
+    }
+#if DEBUG
+    errs() << "[Loop regionsNeeded] Insert insts\n";
+#endif
+    // TODO: fallback if endDom is null? Need hyper-blocks, I think
+    // possibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations?
+    auto* regionStart = truncate(startDom, true, set, nested);
+    auto* regionEnd = truncate(endDom, false, set, nested);
+    if (regionStart == nullptr) {
+      errs() << "[Error] Null startDom after truncation\n";
+    } else if (regionEnd == nullptr) {
+      errs() << "[Error] Null endDom after truncation\n";
+    } else {
+      // errs() << "Region start is before " << *regionStart<<" and region end is before " << *regionEnd<<"\n";
+    }
+
+#if DEBUG
+    errs() << "[Loop regionsNeeded] Add to regionsFound: (" << *regionStart << ", " << *regionEnd << ")\n";
+#endif
+    // Insert into regionsFound
+    regionsFound.emplace_back(regionStart, regionEnd);
+  }  // end while regions needed
+
+  // Now see which region is smallest -- instruction count? they must dominate
+  // each other, so there's no possibility of not running into the start from
+  // the end
+  auto [regionStart, regionEnd] = findShortest(regionsFound);
+  insertRegionInst(0, regionStart);
+  insertRegionInst(1, regionEnd);
+  //}//end while regions needed
+
+#if DEBUG
+  errs() << "*** addRegion ***\n";
+#endif
+}
+
+// Truncate a bb if the instruction is in the bb
+Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set, std::set<Function*> nested) {
+#if DEBUG
+  errs() << "=== truncate ===\n";
+#endif
+
+#if DEBUG
+  errs() << "Set:\n";
+  for (auto& inst : set)
+    errs() << *inst << "\n";
+#endif
+
+  // Truncate the front
+  if (forwards) {
+#if DEBUG
+    errs() << "Truncate startDom\n";
+    errs() << "Go over each inst\n";
+#endif
+    for (auto& I : *B) {
+      // Stop at first inst in bb that is in the set.
+      if (find(set.begin(), set.end(), &I) != set.end()) {
+#if DEBUG
+        errs() << "[Loop B] Found first inst also in set: " << I << "\n";
+#endif
+        return &I;
+      }
+      // Need to stop at relevant CallInsts as well
+      else if (auto* ci = dyn_cast<CallInst>(&I)) {
+        if (nested.find(ci->getCalledFunction()) != nested.end())
+          return &I;
+      }
+    }
+
+#if DEBUG
+    errs() << "Found no inst, return last inst\n";
+#endif
+    // Otherwise just return the last inst
+    return &B->back();
+  }
+
+#if DEBUG
+  errs() << "Truncate endDom\n";
+  errs() << "Go over each inst in reverse\n";
+#endif
+  // Reverse directions if not forwards
+  Instruction* prev = NULL;
+  for (auto I = B->rbegin(), rend = B->rend(); I != rend; I++) {
+    auto* inst = &*I;
+    if (find(set.begin(), set.end(), inst) != set.end()) {
+#if DEBUG
+      errs() << "[Loop B] Found last inst also in set: " << *I << "\n";
+#endif
+      // Need to return the previous inst (next in forwards),
+      // as it should be inserted before the returned inst
+      if (prev == NULL) {
+        // Only happens if use is a ret inst, which is a scope use to make the branching
+        // work, not an actual one, so this is safe
+        return inst;
+      }
+
+#if DEBUG
+      errs() << "[Loop B] Return prev inst: " << *prev << "\n";
+#endif
+      return prev;
+    } else if (auto* ci = dyn_cast<CallInst>(inst)) {
+      if (nested.find(ci->getCalledFunction()) != nested.end()) {
+        return prev;
+      }
+    }
+    prev = inst;
+  }
+
+#if DEBUG
+  errs() << "*** truncate ***\n";
+#endif
+
+#if DEBUG
+  errs() << "Found no inst, return first inst\n";
+#endif
+  // Otherwise just return first inst of the block
+  // errs() << "truncate returning " << bb->front() << "\n";
+  return &B->front();
+}
+
+// findCandidate
+Function* InferFreshCons::findCandidate(std::map<Instruction*, BasicBlock*> blockMap, Function* root) {
+#if DEBUG
+  errs() << "== findCandidate ===\n";
+#endif
+  std::vector<Function*> funList;
+  // Add the parents, without duplicates
+  for (auto& [_, B] : blockMap) {
+    if (!(find(funList.begin(), funList.end(), B->getParent()) != funList.end())) {
+#if DEBUG
+      errs() << "Add: " << B->getParent()->getName() << "\n";
+#endif
+      funList.push_back(B->getParent());
+    }
+  }
+
+  // Easy case: everything is already in the same function
+  if (funList.size() == 1) return funList.at(0);
+
+  /* Algo Goal: get the deepest function that still calls (or is) all funcs in funcList.
+   * Consider: multiple calls? Should be dealt with in the add region function -- eventually each caller
+   * gets its own region
+   */
+  Function* goal = nullptr;
+#if DEBUG
+  errs() << "starting from " << root->getName() << "\n";
+#endif
+  deepCaller(root, funList, &goal);
+  if (goal == nullptr) {
+    errs() << "ERROR: deepCaller failed\n";
+  }
+
+#if DEBUG
+  errs() << "*** findCandidate ***\n";
+#endif
+  return goal;
+}
+
+/*Recursive: from a root, returns list of called funcs. */
+std::vector<Function*> InferFreshCons::deepCaller(Function* root, std::vector<Function*>& funList, Function** goal) {
+  std::vector<Function*> calledFuncs;
+  bool mustIncludeSelf = false;
+
+  for (inst_iterator inst = inst_begin(root), E = inst_end(root); inst != E; ++inst) {
+    if (CallInst* ci = dyn_cast<CallInst>(&(*inst))) {
+      calledFuncs.push_back(ci->getCalledFunction());
+    }
+  }
+  std::vector<Function*> explorationList;
+  for (auto* item : funList) {
+    // skip over root or called funcs
+    if ((find(calledFuncs.begin(), calledFuncs.end(), item) != calledFuncs.end()) || item == root) {
+      if (item == root) {
+        mustIncludeSelf = true;
+      }
+      continue;
+    }
+    explorationList.push_back(item);
+#if DEBUGINFER
+    errs() << "need to find " << item->getName() << "\n";
+#endif
+  }
+  // this function is a root of a call tree that calls everything in the func List
+  if (explorationList.empty()) {
+#if DEBUGINFER
+    errs() << "empty list\n";
+#endif
+    *goal = root;
+    return calledFuncs;
+  }
+  // otherwise recurse
+  Function* candidate = nullptr;
+  for (Function* called : calledFuncs) {
+    std::vector<Function*> partial = deepCaller(called, explorationList, &candidate);
+    // if candidate is set, it means called is a root for everything in the explorationList
+    if (candidate != nullptr) {
+      *goal = candidate;
+#if DEBUGINFER
+      errs() << "New candidate: " << (*goal)->getName() << "\n";
+#endif
+    }
+    // remove from explorationList, but add to calledFuncs
+    for (Function* item : partial) {
+      func_vec::iterator place = find(explorationList.begin(), explorationList.end(), item);
+      if (place != explorationList.end()) {
+        explorationList.erase(place);
+      }
+      calledFuncs.push_back(item);
+    }
+  }
+  // current point is a root
+  if (explorationList.empty()) {
+    // not the deepest
+    if (candidate != nullptr && !mustIncludeSelf) {
+      *goal = candidate;
+    } else {
+      // is the deepest
+      *goal = root;
+    }
+  }
+  return calledFuncs;
+}
+
+// Get the min of the max length of each region
+inst_inst_pair InferFreshCons::findShortest(inst_inst_vec regionsFound) {
+#if DEBUG
+  errs() << "=== findShortest ===\n";
+#endif
+  inst_inst_pair best;
+  int shortest = INT32_MAX;
+
+#if DEBUG
+  errs() << "Go over regionsFound\n";
+#endif
+  for (auto& [start, end] : regionsFound) {
+    int prefixLength = 0, found = 0;
+    auto* startParent = start->getParent();
+#if DEBUG
+    errs() << "[Loop regionsFound] startParent: " << *startParent << "\n";
+    errs() << "Go over startParent insts\n";
+#endif
+    for (auto& I : *startParent) {
+      prefixLength++;
+      if (&I == start) break;
+    }
+
+    // Get the max length from the bb to the end instruction
+    std::vector<BasicBlock*> v;
+    int endLength = getSubLength(startParent, end, v);
+    // Substract the prefix before the start inst
+    endLength -= prefixLength;
+#if DEBUG
+    errs() << "[Loop regionsFound] Region length " << endLength << "\n";
+#endif
+    if (endLength < shortest) {
+#if DEBUG
+      errs() << "[Loop regionsFound] Shortest region: (" << *start << ", " << *end
+             << ") at length " << endLength << "\n";
+#endif
+      shortest = endLength;
+      best = std::make_pair(start, end);
+    }
+  }
+
+#if DEBUG
+  errs() << "*** findShortest ***\n";
+#endif
+  return best;
+}
+
+int InferFreshCons::getSubLength(BasicBlock* B, Instruction* end, std::vector<BasicBlock*> visited) {
+#if DEBUG
+  errs() << "=== getSubLength ===\n";
+#endif
+
+  int count = 0, max_ret = 0;
+  visited.push_back(B);
+#if DEBUG
+  errs() << "Go over bb insts\n";
+#endif
+  for (auto& I : *B) {
+    count++;
+
+    if (&I == end) {
+#if DEBUG
+      errs() << "[Loop I] Cur inst = end, stop\n";
+#endif
+      return count;
+    }
+
+    if (auto* ci = dyn_cast<CallInst>(&I)) {
+      auto* cf = ci->getCalledFunction();
+      if (!cf->empty() && cf != NULL) {
+#if DEBUG
+        errs() << "[Loop I] Cur inst = CallInst, calling: " << cf->getName() << "\n";
+#endif
+        count += cf->getInstructionCount();
+      }
+    }
+
+    if (I.isTerminator()) {
+#if DEBUG
+      errs() << "[Loop I] Cur inst = terminator\n";
+#endif
+      for (int i = 0; i < I.getNumSuccessors(); i++) {
+        auto* next = I.getSuccessor(i);
+        // already counted -- do something more fancy for loops?
+        if (find(visited.begin(), visited.end(), next) != visited.end()) continue;
+        int intermed = getSubLength(next, end, visited);
+        if (intermed > max_ret) {
+          max_ret = intermed;
+        }
+      }
+    }
+  }
+
+#if DEBUG
+  errs() << "*** getSubLength ***\n";
+#endif
+  return count + max_ret;
+}
+
+bool InferFreshCons::sameFunction(std::map<Instruction*, BasicBlock*> blockMap) {
+  auto* BComp = blockMap.begin()->second->getParent();
+  for (auto& [_, B] : blockMap)
+    if (B->getParent() != BComp) return false;
+  return true;
+}
diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
index 0033a78..ee22ad8 100644
--- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp
+++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
@@ -1,847 +1,905 @@
 #include "include/TaintTracker.h"
 
+// Main dataflow function to construct map of store (TODO: not just stores) insts to vars (inputs?) they depend on
+inst_insts_map buildInputs(Module* M) {
+#if DEBUG
+  errs() << "=== buildInputs ===\n";
+#endif
+
+  inst_vec inputInsts = findInputInsts(M);
+  inst_insts_map taintedInsts;
+  inst_vec promotedInputs;
+
+  for (auto inputInst : inputInsts) {
+#if DEBUG
+    errs() << "[Loop inputInst] orig input: " << *inputInst << "\n";
+#endif
+
+    // Add self to map
+    taintedInsts[inputInst].insert(inputInst);
+    std::queue<Value*> toExplore;
+#if DEBUG
+    errs() << "[Loop inputInst] Add orig input to toExplore\n";
+#endif
+    toExplore.push(inputInst);
+
+#if DEBUG
+    errs() << "[Loop inputInst] Explore flows from orig input\n";
+#endif
+
+    // Iterate until no more inter-proc flows found
+    while (!toExplore.empty()) {
+#if DEBUG
+      errs() << "=== Loop toExplore ===\n";
+#endif
+      auto* curVal = toExplore.front();
+      toExplore.pop();
 
-/*Main DataFlow function to construct map of store insts to vars they depend on*/
-inst_insts_map buildInputs(Module* m) 
-{
-  inst_vec inputs = findInputInsts(m);
-  inst_insts_map taintedDecl;
-  inst_vec promoted_inputs;
-
-  for (Instruction* iOp : inputs) {
-    #if DEBUG
-    errs() << "Starting input: " << *iOp <<"\n";
-    #endif
-    //don't forget to add self to map
-    taintedDecl[iOp].insert(iOp);
-    queue<Value*> toExplore;
-    toExplore.push(iOp);
-
-    //iterate until no more interproc flows found
-    while(!toExplore.empty()) {
-      
-      Value* currVal = toExplore.front();
-      if (currVal == NULL) {
-        continue;
-      }
+      if (curVal == NULL) continue;
+
+#if DEBUG
+      errs() << "[Loop toExplore] cur inst: " << *curVal << "\n";
+#endif
 
       val_vec interProcFlows;
-      toExplore.pop();
-      if (currVal == iOp) {
-        interProcFlows = traverseLocal(currVal, iOp, &taintedDecl, nullptr);
-        for (Value* vipf : interProcFlows) {
-          if(Instruction* iipf = dyn_cast<Instruction>(vipf)) {
-            if (CallInst* anno_check = dyn_cast<CallInst>(iipf)){
-                    //we delete these later... creates problems
-                    if (anno_check->getName().contains("Fresh") || 
-                    anno_check->getName().contains("Consistent") ) {
-                      continue;
-                    }
-                  }
-            taintedDecl[iipf].insert(iOp);
+      if (curVal == inputInst) {
+#if DEBUG
+        errs() << "[Loop toExplore] cur inst = orig input\n";
+        errs() << "[Loop toExplore] Call traverseLocal with cur inst (tainted), orig input (srcInput), caller (none)\n";
+#endif
+        interProcFlows = traverseLocal(curVal, inputInst, &taintedInsts, nullptr);
+#if DEBUG
+        errs() << "[Loop toExplore] [cur inst = orig input] Inspect interProcFlows:\n";
+#endif
+        for (auto* vipf : interProcFlows) {
+          if (auto* iipf = dyn_cast<Instruction>(vipf)) {
+            if (auto* anno_check = dyn_cast<CallInst>(iipf)) {
+              // We delete these later... creates problems
+              if (isAnnot(anno_check->getName())) continue;
+            }
+
+#if DEBUG
+            errs() << "Adding orig input (" << *inputInst << ") to set at " << *iipf << "\n";
+#endif
+            taintedInsts[iipf].insert(inputInst);
           }
         }
-      } else if (isa<CallInst>(currVal)) {
-        //note it will not be iop, even though iop is a call
-        //this case handles both returns and pbref
-        
-        promoted_inputs.push_back(dyn_cast<CallInst>(currVal));
-        Value* next = toExplore.front();
+      } else if (isa<CallInst>(curVal)) {
+#if DEBUG
+        errs() << "[Loop toExplore] cur inst = CallInst\n";
+#endif
+        // Note it will not be iop, even though iop is a call
+        // This case handles both returns and pbref
+
+        promotedInputs.push_back(dyn_cast<CallInst>(curVal));
+        auto* next = toExplore.front();
         toExplore.pop();
-        //if the next is a return, this was a return flow
-        //otherwise, if it's an arg, this was pbref
+        // If the next is a return, this was a return flow
+        // Otherwise, if it's an arg, this was pbref
+        //? pbref - pass by reference?
         if (isa<ReturnInst>(next)) {
-         interProcFlows =  traverseLocal(currVal, dyn_cast<CallInst>(currVal), &taintedDecl, nullptr);
+#if DEBUG
+          errs() << "[Loop toExplore] cur inst next = Return inst (return flow)\n";
+#endif
+          interProcFlows = traverseLocal(curVal, dyn_cast<CallInst>(curVal), &taintedInsts, nullptr);
           for (Value* vipf : interProcFlows) {
-            if(Instruction* iipf = dyn_cast<Instruction>(vipf)) {
-
-              //don't add self 
-              if (currVal == vipf) {
+            if (Instruction* iipf = dyn_cast<Instruction>(vipf)) {
+              // don't add self
+              if (curVal == vipf) {
                 continue;
               }
-              if (CallInst* anno_check = dyn_cast<CallInst>(iipf)){
-                    //we delete these later... creates problems
-                    if (anno_check->getName().contains("Fresh") || 
-                    anno_check->getName().contains("Consistent") ) {
-                      continue;
-                    }
-                  }
-              taintedDecl[iipf].insert(dyn_cast<CallInst>(currVal));
+              if (CallInst* anno_check = dyn_cast<CallInst>(iipf)) {
+                // we delete these later... creates problems
+                if (anno_check->getName().contains("Fresh") ||
+                    anno_check->getName().contains("Consistent")) {
+                  continue;
+                }
+              }
+              taintedInsts[iipf].insert(dyn_cast<CallInst>(curVal));
             }
-          }          
+          }
         } else if (isa<Argument>(next)) {
-          //grab the para corresponding to the argument
+#if DEBUG
+          errs() << "[Loop toExplore] cur inst next = Argument (pbref)\n";
+#endif
+          // Grab the para corresponding to the argument
           int index = -1;
           int i = 0;
-          CallInst* ci = dyn_cast<CallInst>(currVal);
-          
+          CallInst* ci = dyn_cast<CallInst>(curVal);
 
-          if (ci->getCalledFunction() == NULL) {
-            continue;
+          if (ci->getCalledFunction() == NULL) continue;
+          if (ci->getCalledFunction()->empty()) continue;
+
+#if DEBUG
+          errs() << "exploring function " << ci->getCalledFunction()->getName() << "\n";
+#endif
+
+          for (auto& arg : ci->getCalledFunction()->args()) {
+            // errs() <<"arg is "<<arg<<"\n";
+            if (dyn_cast<Value>(&arg) != next) {
+              i++;
+            } else {
+              index = i;
+            }
           }
-          if (ci->getCalledFunction()->empty()) {
+          if (index == -1) {
+#if DEBUG
+            errs() << "couldn't find pass by ref " << *next << "\n";
+#endif
             continue;
           }
 
-          #if DEBUG
-            errs() << "exploring function " <<  ci->getCalledFunction()->getName() << "\n";
-          #endif
-         
-          for (auto& arg : ci->getCalledFunction()->args()){
-        	//errs() <<"arg is "<<arg<<"\n";
-          	if(dyn_cast<Value>(&arg)!=next) {
-          	  i++;
-          	} else {
-	            index = i;
-          	}
-	 
-          }
-          if(index == -1){
-            #if DEBUG
-          	errs() << "couldn't find pass by ref " << *next << "\n";
-            #endif
-          	continue;
+          Value* tArg = ci->getArgOperand(index);
+          // errs() << "arg_op: "<< *arg_op<<"\n";
+          // check if reference is part of an array
+          if (GEPOperator* gep = dyn_cast<GEPOperator>(tArg)) {
+            tArg = gep->getPointerOperand();
           }
-        
-	        Value* tArg = ci->getArgOperand(index);
-	        //errs() << "arg_op: "<< *arg_op<<"\n";
-	        //check if reference is part of an array
-	        if (GEPOperator* gep = dyn_cast<GEPOperator>(tArg)) {
-	          tArg = gep->getPointerOperand();
-	        } 
-          //if bitcast inst,
-          else if (BitCastInst* bci = dyn_cast<BitCastInst>(tArg)){
+          // if bitcast inst,
+          else if (BitCastInst* bci = dyn_cast<BitCastInst>(tArg)) {
             tArg = bci->getOperand(0);
           }
-          //need to actually find the first use *after* the callInst
-          Instruction* fstUse = ptrAfterCall(tArg,ci);
-          if (fstUse!=nullptr && fstUse!=tArg) {
-            #if DEBUG
+          // need to actually find the first use *after* the callInst
+          Instruction* fstUse = ptrAfterCall(tArg, ci);
+          if (fstUse != nullptr && fstUse != tArg) {
+#if DEBUG
             errs() << "First use after call: " << *fstUse << "\n";
-            #endif
-            //if the first use is itself a callinst, then treat as a tainted para case, 
+#endif
+            // if the first use is itself a callinst, then treat as a tainted para case,
             val_vec visited_fstuse;
             visited_fstuse.push_back(ci);
-      
-            while (CallInst* ci_fstuse = dyn_cast<CallInst>(fstUse) ) {
-              //already visited, as in loop
-              if (find(visited_fstuse.begin(),visited_fstuse.end(), ci_fstuse)
-              !=visited_fstuse.end()) {
-                //no non-call uses
+
+            while (CallInst* ci_fstuse = dyn_cast<CallInst>(fstUse)) {
+              // already visited, as in loop
+              if (find(visited_fstuse.begin(), visited_fstuse.end(), ci_fstuse) != visited_fstuse.end()) {
+                // no non-call uses
                 fstUse = nullptr;
                 break;
               }
-              if (CallInst* anno_check = dyn_cast<CallInst>(ci_fstuse)){
-                    //we delete these later... creates problems
-                    if (anno_check->getName().contains("Fresh") || 
-                    anno_check->getName().contains("Consistent") ) {
-                      continue;
-                    }
-                  }
+              if (CallInst* anno_check = dyn_cast<CallInst>(ci_fstuse)) {
+                // we delete these later... creates problems
+                if (anno_check->getName().contains("Fresh") ||
+                    anno_check->getName().contains("Consistent")) {
+                  continue;
+                }
+              }
               visited_fstuse.push_back(ci_fstuse);
 
-              unsigned int arg_num = ci_fstuse->getNumArgOperands();
-          
+              unsigned int arg_num = ci_fstuse->arg_size();
+
+#if DEBUG
+              errs() << "[Loop customUsers] Find index of tainted arg:\n";
+#endif
               // Find the index of the tainted argument
-              for (unsigned int i = 0; i < arg_num; i++){
-                #if DEBUG
-                  errs() << "DEBUG: comparing "<< *tArg <<" and " << *(ci_fstuse->getArgOperand(i))<<"\n";
-                #endif
-                if(ci_fstuse->getArgOperand(i)==tArg) {
-                  #if DEBUG
-                   // errs() << "DEBUG: pushing arg of "<< calledFunc->getName() <<"\n";
-                  #endif
+              for (unsigned int i = 0; i < arg_num; i++) {
+                // TODO
+#if DEBUG
+                errs() << "comparing " << *tArg << " and " << *(ci_fstuse->getArgOperand(i)) << "\n";
+#endif
+                if (ci_fstuse->getArgOperand(i) == tArg) {
+#if DEBUG
+                  // errs() << "pushing arg of "<< calledFunc->getName() <<"\n";
+#endif
                   interProcFlows.push_back((ci_fstuse->getCalledFunction()->arg_begin() + i));
-                  //MUST also push back the call inst.
+                  // MUST also push back the call inst.
                   interProcFlows.push_back(ci_fstuse);
-                  //and the srcOp
+                  // and the srcOp
                   interProcFlows.push_back(ci);
-                  
+
                   break;
                 }
               }
-              //find next local use 
-              //promoted_inputs.push_back(ci);
-              taintedDecl[ci_fstuse].insert(ci);
-              fstUse  = ptrAfterCall(tArg,ci_fstuse);
+              // find next local use
+              // promoted_inputs.push_back(ci);
+              taintedInsts[ci_fstuse].insert(ci);
+              fstUse = ptrAfterCall(tArg, ci_fstuse);
 
               if (fstUse == nullptr) {
                 break;
               }
-            } 
-            //re nullptr check
-            if (fstUse!=nullptr) {  
-              interProcFlows =  traverseLocal(fstUse, dyn_cast<CallInst>(currVal), &taintedDecl, nullptr);
+            }
+            // re nullptr check
+            if (fstUse != nullptr) {
+              interProcFlows = traverseLocal(fstUse, dyn_cast<CallInst>(curVal), &taintedInsts, nullptr);
               for (Value* vipf : interProcFlows) {
-                if(Instruction* iipf = dyn_cast<Instruction>(vipf)) {
-                  if (CallInst* anno_check = dyn_cast<CallInst>(iipf)){
-                    //we delete these later... creates problems
-                    if (anno_check->getName().contains("Fresh") || 
-                    anno_check->getName().contains("Consistent") ) {
+                if (Instruction* iipf = dyn_cast<Instruction>(vipf)) {
+                  if (CallInst* anno_check = dyn_cast<CallInst>(iipf)) {
+                    // we delete these later... creates problems
+                    if (anno_check->getName().contains("Fresh") ||
+                        anno_check->getName().contains("Consistent")) {
                       continue;
                     }
                   }
-                  taintedDecl[iipf].insert(dyn_cast<CallInst>(currVal));
+                  taintedInsts[iipf].insert(dyn_cast<CallInst>(curVal));
                 }
               }
             }
-          }  
+          }
         }
-      } else if (isa<Argument>(currVal)) {
-        #if DEBUG
-          	errs() << "exploring tainted arg " << *currVal << "\n";
-          #endif
-        Instruction* caller = dyn_cast<CallInst>(toExplore.front());
-        
-        //promoted_inputs.push_back(caller);
+      } else if (isa<Argument>(curVal)) {
+#if DEBUG
+        errs() << "[Loop toExplore] cur inst = Argument (tainted arg)\n";
+#endif
+
+        auto* caller = dyn_cast<CallInst>(toExplore.front());
         toExplore.pop();
-        Instruction* innerSrcOp = dyn_cast<Instruction>(toExplore.front());
+#if DEBUG
+        errs() << "[Loop toExplore] Caller: " << *caller << "\n";
+#endif
+        // promoted_inputs.push_back(caller);
+
+        auto* innerInputInst = dyn_cast<Instruction>(toExplore.front());
         toExplore.pop();
-        interProcFlows = traverseLocal(currVal, innerSrcOp, &taintedDecl, caller);
-             
-              for (Value* vipf : interProcFlows) {
-                if(Instruction* iipf = dyn_cast<Instruction>(vipf)) {
-                  if (CallInst* anno_check = dyn_cast<CallInst>(iipf)){
-                    //we delete these later... creates problems
-                    if (anno_check->getName().contains("Fresh") || 
-                    anno_check->getName().contains("Consistent") ) {
-                      continue;
-                    }
-                  }
-                  taintedDecl[iipf].insert(innerSrcOp);
-                }
-              }
-      }//end elsif chain
-      #if DEBUG
-        errs() << "Finished iteration\n";
-      #endif
-      for (Value* item : interProcFlows) {
-        if(item != NULL) {
-          //errs() <<"pushing item " << *item <<"\n";
+#if DEBUG
+        errs() << "[Loop toExplore] orig input: " << *innerInputInst << "\n";
+        errs() << "[Loop toExplore] Call traverseLocal with cur inst (tainted), orig input (srcInput), caller\n";
+#endif
+
+        interProcFlows = traverseLocal(curVal, innerInputInst, &taintedInsts, caller);
+
+#if DEBUG
+        errs() << "[Loop toExplore] Inspect interProcFlows:\n";
+#endif
+        for (auto* vipf : interProcFlows) {
+          if (auto* iipf = dyn_cast<Instruction>(vipf)) {
+            if (auto* anno_check = dyn_cast<CallInst>(iipf)) {
+              // We delete these later... creates problems
+              if (isAnnot(anno_check->getName())) continue;
+            }
+            taintedInsts[iipf].insert(innerInputInst);
+#if DEBUG
+            errs() << "Adding innerInputInst (" << *innerInputInst << ") to set at " << *iipf << "\n";
+#endif
+          }
+        }
+      }  // end elsif chain
+
+      for (auto* item : interProcFlows) {
+        if (item != NULL) {
+#if DEBUG
+          errs() << "Add to toExplore: " << *item << "\n";
+#endif
           toExplore.push(item);
         } else {
           errs() << "ERROR: encountered null interproc item\n";
         }
       }
-    }//end while queue not empty
-  }//end for all iOp
-  
-  return taintedDecl;
+
+#if DEBUG
+      errs() << "*** Loop toExplore ***\n";
+#endif
+    }  // end while queue not empty
+  }    // end for all inputInsts
+
+#if DEBUG
+  errs() << "*** buildInputs ***\n";
+#endif
+  return taintedInsts;
 }
 
-val_vec traverseLocal(Value* tainted, Instruction* srcOp, inst_insts_map* iInfo, Instruction* caller)
-{
+val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* taintedInsts, Instruction* caller) {
+#if DEBUG
+  errs() << "=== traverseLocal ===\n";
+#endif
+
   val_vec interProcSinks;
-  queue<Value*> localDeps;
+  std::queue<Value*> localDeps;
 
+#if DEBUG
+  errs() << "Add cur inst to localDeps\n";
+#endif
   localDeps.push(tainted);
-  while(!localDeps.empty()) {
-    Value* currVal = localDeps.front();
+  while (!localDeps.empty()) {
+#if DEBUG
+    errs() << "=== Loop localDeps ===\n";
+#endif
+    auto* curVal = localDeps.front();
     localDeps.pop();
-     val_vec customUsers;
-    if (StoreInst* si = dyn_cast<StoreInst>(currVal)) {    
-      //add the pointer to deps, as stores have no uses
-      //Add info on the store to the map
-      if(iInfo->find(si)!=iInfo->end()) {
-        if (find(iInfo->at(si).begin(), iInfo->at(si).end(), srcOp)!=iInfo->at(si).end()) {
-          continue;
-        } else {
-          iInfo->at(si).insert(srcOp);
-        }
+#if DEBUG
+    errs() << "[Loop localDeps] cur inst: " << *curVal << "\n";
+#endif
+    val_vec customUsers;
+    if (auto* si = dyn_cast<StoreInst>(curVal)) {
+#if DEBUG
+      errs() << "[Loop localDeps] cur inst = StoreInst\n";
+#endif
+      // Add the pointer to deps, as stores have no uses
+      // Add info on the store to the map
+      if (taintedInsts->find(si) != taintedInsts->end()) {
+        auto insts = taintedInsts->at(si);
+        if (std::find(insts.begin(), insts.end(), srcInput) != insts.end()) continue;
+        taintedInsts->at(si).insert(srcInput);
       } else {
-        set<Instruction*> seti;
-        seti.insert(srcOp);
-        iInfo->emplace(si, seti);
+        std::set<Instruction*> seti;
+        seti.insert(srcInput);
+        taintedInsts->emplace(si, seti);
       }
-      #if DEBUG
-        errs() << " adding to map " << *srcOp << " for " << *si << "\n";
-      #endif
-      //See if it is (or aliases?) one of the function arguments
-      for (Argument& arg : si->getFunction()->args()) {
-        Value* to_comp = si->getPointerOperand()->stripPointerCasts();
-        #if DEBUG
-        errs() << " PBRef comp: " << *to_comp << " and " << arg << "\n";
-        #endif
-        if (to_comp== &arg) {
-          //if taint came from inside any callsite is potentially tainted
+#if DEBUG
+      errs() << "[Loop localDeps] Adding orig input (" << *srcInput << ") to set at cur inst (" << *si << ")\n";
+#endif
+      // See if it is (or aliases?) one of the function arguments (PBRef comp)
+      for (auto& arg : si->getFunction()->args()) {
+        auto* storePtr = si->getPointerOperand()->stripPointerCasts();
+#if DEBUG
+        errs() << "[Loop localDeps] Is ptr being stored to (" << *storePtr << ") = fun arg (" << arg << ")\n";
+#endif
+        if (storePtr == &arg) {
+          // if taint came from inside any callsite is potentially tainted
           if (caller == nullptr) {
-            for(Value* calls : si->getFunction()->users()) {
+            for (auto calls : si->getFunction()->users()) {
               interProcSinks.push_back(calls);
               interProcSinks.push_back(dyn_cast<Value>(&arg));
-              if (Instruction* key = dyn_cast<Instruction>(calls)) {
-               //check to make sure not already visited
-             //   iInfo->at(key).insert(srcOp);
-                
+              if (auto key = dyn_cast<Instruction>(calls)) {
+                // check to make sure not already visited
+                //   taintedInsts->at(key).insert(srcOp);
               }
             }
           } else {
-          //otherwise, just the caller's
+            // otherwise, just the caller's
             interProcSinks.push_back(caller);
             interProcSinks.push_back(dyn_cast<Value>(&arg));
-            if (Instruction* key = dyn_cast<Instruction>(caller)) {
-
-
-              //check to make sure not already visited
-      //        iInfo->at(key).insert(srcOp);
-              
+            if (auto key = dyn_cast<Instruction>(caller)) {
+              // check to make sure not already visited
+              //        taintedInsts->at(key).insert(srcOp);
             }
           }
         }
       }
-      //construct "users" of the store
-      #if DEBUG
-        errs() << "DEBUG: Store users\n";
-      #endif
-      //add in loads that are reachable from the tainted store.
-      Value* ptr = si->getPointerOperand();
-      //if bci, get the operand, as that's the useful ptr
-      if (BitCastInst* bciptr = dyn_cast<BitCastInst>(ptr) ){
-        ptr = bciptr->getOperand(0);
-      }
-      for(Value* use : ptr->users()){
-        if (Instruction* useOfStore = dyn_cast<Instruction>(use)) {
-          #if DEBUG
-            errs() << "DEBUG: checking use " << *useOfStore << "\n";
-          #endif
+      // Construct "users" of the store
+#if DEBUG
+      errs() << "[Loop localDeps] Add users (loads) of store to customUsers:\n";
+#endif
+      // Add in loads that are reachable from the tainted store.
+      auto* ptr = si->getPointerOperand();
+      // If bci, get the operand, as that's the useful ptr
+      if (auto bciptr = dyn_cast<BitCastInst>(ptr)) ptr = bciptr->getOperand(0);
+      for (auto* use : ptr->users()) {
+        if (auto* useOfStore = dyn_cast<Instruction>(use)) {
           if (storePrecedesUse(useOfStore, si)) {
+#if DEBUG
+            errs() << "[Loop Store Users] store precedes this use, add:" << *useOfStore << "\n";
+#endif
             customUsers.push_back(useOfStore);
           }
         }
       }
-      //update currVal to be the pointer
-      currVal = si->getPointerOperand();
+      // Update curVal to be the pointer
+      curVal = si->getPointerOperand();
 
-      //if it's a gepi, see if there are others that occur afterwards 
+      // If it's a gepi, see if there are others that occur afterwards
       if (isa<GetElementPtrInst>(si->getPointerOperand())) {
         inst_vec matching = couldMatchGEPI(dyn_cast<GetElementPtrInst>(si->getPointerOperand()));
-        for (Instruction* item : matching) {
+        for (auto item : matching) {
           localDeps.push(item);
         }
-        //check pbref, need to compare op of the gepi, not gepi itself
-         for (Argument& arg : si->getFunction()->args()) {
-          #if DEBUG
-          errs() << " PBRef comp: " << *dyn_cast<Instruction>(currVal)->getOperand(0) << " and " << arg << "\n";
-          #endif
-          if (dyn_cast<Instruction>(currVal)->getOperand(0) == &arg) {
-            //if taint came from inside any callsite is potentially tainted
+        // check pbref, need to compare op of the gepi, not gepi itself
+        for (auto& arg : si->getFunction()->args()) {
+#if DEBUG
+          errs() << " PBRef comp: " << *dyn_cast<Instruction>(curVal)->getOperand(0) << " and " << arg << "\n";
+#endif
+          if (dyn_cast<Instruction>(curVal)->getOperand(0) == &arg) {
+            // if taint came from inside any callsite is potentially tainted
             if (caller == nullptr) {
-              for(Value* calls : si->getFunction()->users()) {
+              for (Value* calls : si->getFunction()->users()) {
                 interProcSinks.push_back(calls);
                 interProcSinks.push_back(dyn_cast<Value>(&arg));
                 if (Instruction* key = dyn_cast<Instruction>(calls)) {
-
-          //         iInfo->at(key).insert(srcOp);
+                  //         taintedInsts->at(key).insert(srcOp);
                 }
               }
             } else {
-            //otherwise, just the caller's
+              // otherwise, just the caller's
               interProcSinks.push_back(caller);
               interProcSinks.push_back(dyn_cast<Value>(&arg));
               if (Instruction* key = dyn_cast<Instruction>(caller)) {
-
-            //  iInfo->at(key).insert(srcOp);
+                //  taintedInsts->at(key).insert(srcOp);
               }
             }
           }
-         }
+        }
       }
-        
+
     } else {
-      //if not a store, do normal users of currval
-      customUsers.insert(customUsers.end(), currVal->user_begin(), currVal->user_end());
+#if DEBUG
+      errs() << "[Loop localDeps] cur inst != StoreInst\n";
+      errs() << "[Loop localDeps] Add users of cur inst to customUsers:\n";
+      for (auto* use : curVal->users()) errs() << *use << "\n";
+#endif
+      // If not a store, do normal users of curVal
+      customUsers.insert(customUsers.end(), curVal->user_begin(), curVal->user_end());
     }
 
-    
-   
-      
-    for (Value* use : customUsers) { 
-
-      //check that the use of a tainted pointer is really tainted
-      
-      //this is checking if the use is a tainted store 
-      
-      if (ReturnInst* ri = dyn_cast<ReturnInst>(use)) {
-        #if DEBUG
-          errs() << "DEBUG: in return case\n";
-        #endif
+#if DEBUG
+    errs() << "[Loop localDeps] Go over uses\n";
+#endif
+    //* Here we may cross over to another procedure
+    for (auto* use : customUsers) {
+      // Check that the use of a tainted pointer is really tainted
+
+      // This is checking if the use is a tainted store
+
+      if (auto ri = dyn_cast<ReturnInst>(use)) {
+#if DEBUG
+        errs() << "[Loop customUsers] use = ReturnInst\n";
+#endif
         if (caller == nullptr) {
-          for(Value* calls : ri->getFunction()->users()) {
-            if(CallInst* ci = dyn_cast<CallInst>(calls)) {
+#if DEBUG
+          errs() << "[Loop customUsers] No caller\n";
+#endif
+          for (auto calls : ri->getFunction()->users()) {
+            if (auto ci = dyn_cast<CallInst>(calls)) {
               interProcSinks.push_back(calls);
-              //extra for bookkeeping
+              // extra for bookkeeping
               interProcSinks.push_back(use);
             }
           }
         } else {
-        //otherwise, just the caller's
+#if DEBUG
+          errs() << "[Loop customUsers] Some caller\n";
+#endif
+          // otherwise, just the caller's
           interProcSinks.push_back(caller);
-          //extra for bookkeeping
+          // extra for bookkeeping
           interProcSinks.push_back(use);
         }
-
-      } else if (isa<CallInst>(use)) {
-        #if DEBUG
-          errs() << "DEBUG: in call case\n";
-        #endif
-        //Add the right argument to the list
-        CallInst* ci = dyn_cast<CallInst>(use);
-        Function* calledFunc = ci ->getCalledFunction();
-        if (calledFunc == NULL || calledFunc->empty()) {
-          //special case for llvm.memcpy
-          //See if it is (or aliases?) one of the function arguments
-          if (calledFunc!=NULL && calledFunc->hasName() &&
-            calledFunc->getName().contains("llvm.memcpy")) {
-            //errs() << "DEBUG: memcpy " << *ci << "\n";  
+      } else if (auto* ci = dyn_cast<CallInst>(use)) {
+#if DEBUG
+        errs() << "[Loop customUsers] use = CallInst\n";
+#endif
+        // Add the right argument to the list
+        auto* calledFun = ci->getCalledFunction();
+        if (calledFun == NULL || calledFun->empty()) {
+          // special case for llvm.memcpy
+          // See if it is (or aliases?) one of the function arguments
+          if (calledFun != NULL && calledFun->hasName() &&
+              calledFun->getName().contains("llvm.memcpy")) {
+            // errs() << "memcpy " << *ci << "\n";
             Value* src = ci->getOperand(1)->stripPointerCasts();
             Value* dest = ci->getOperand(0);
-           // errs() << "DEBUG: with dest " << *dest << "\n";  
+            // errs() << "with dest " << *dest << "\n";
             if (BitCastInst* bci = dyn_cast<BitCastInst>(dest)) {
               dest = bci->getOperand(0);
-            } 
+            }
             if (GetElementPtrInst* gepi = dyn_cast<GetElementPtrInst>(dest)) {
               dest = gepi->getOperand(0);
-          //    errs() << "DEBUG: and gepi dest " << *dest << "\n"; 
+              //    errs() << "and gepi dest " << *dest << "\n";
             }
             bool found = false;
             for (Argument& arg : ci->getFunction()->args()) {
-              //Value* to_comp = 
-              #if DEBUG
+// Value* to_comp =
+#if DEBUG
               errs() << " PBRef comp: " << *dest << " and " << arg << "\n";
-              #endif
-              if (dest== &arg) {
+#endif
+              if (dest == &arg) {
                 found = true;
-                //if taint came from inside any callsite is potentially tainted
+                // if taint came from inside any callsite is potentially tainted
                 if (caller == nullptr) {
-                  for(Value* calls : ci->getFunction()->users()) {
+                  for (Value* calls : ci->getFunction()->users()) {
                     interProcSinks.push_back(calls);
                     interProcSinks.push_back(dyn_cast<Value>(&arg));
                     if (Instruction* key = dyn_cast<Instruction>(calls)) {
-
-               //        iInfo->at(key).insert(srcOp);
+                      //        taintedInsts->at(key).insert(srcOp);
                     }
                   }
                 } else {
-                //otherwise, just the caller's
+                  // otherwise, just the caller's
                   interProcSinks.push_back(caller);
                   interProcSinks.push_back(dyn_cast<Value>(&arg));
                   if (Instruction* key = dyn_cast<Instruction>(caller)) {
-              //      iInfo->at(key).insert(srcOp);
+                    //      taintedInsts->at(key).insert(srcOp);
                   }
                 }
               }
             }
-            //it wasn't pbref, just "store", so find fst ptr after call
-            //and also put in iInfo
+            // it wasn't pbref, just "store", so find fst ptr after call
+            // and also put in taintedInsts
             if (!found) {
-              Value* destFst = ptrAfterCall(dest,ci);
-
-              
-              //in case of loop 
-              if (destFst !=ci->getOperand(0)) {
-               // errs () << "found a memcpy store " << *destFst <<"\n";
-                if(iInfo->find(ci)!=iInfo->end()) {
-                  if (find(iInfo->at(ci).begin(), iInfo->at(ci).end(), srcOp)!=iInfo->at(ci).end()) {
+              Value* destFst = ptrAfterCall(dest, ci);
+
+              // in case of loop
+              if (destFst != ci->getOperand(0)) {
+                // errs () << "found a memcpy store " << *destFst <<"\n";
+                if (taintedInsts->find(ci) != taintedInsts->end()) {
+                  if (find(taintedInsts->at(ci).begin(), taintedInsts->at(ci).end(), srcInput) != taintedInsts->at(ci).end()) {
                     continue;
                   } else {
-                    iInfo->at(ci).insert(srcOp);
+                    taintedInsts->at(ci).insert(srcInput);
                   }
                 } else {
-                  set<Instruction*> seti;
-                  seti.insert(srcOp);
-                  iInfo->emplace(ci, seti);
+                  std::set<Instruction*> seti;
+                  seti.insert(srcInput);
+                  taintedInsts->emplace(ci, seti);
                 }
                 localDeps.push(destFst);
               }
-            }  
-          } //end memcpy check
-
-          //conservative tainting decision
-          if (calledFunc->empty()) {
-            
-              //if it's empty but declared in our mod (one of the passed in C ones)
-              //and it returns a value, then consider the taint passed to the 
-              //return
-              if (!calledFunc->getName().contains("llvm") && 
-              !calledFunc->getName().contains("core")) {
-                #if DEBUG
-                errs() << "DEBUG: pushing presumed c lib func " << calledFunc->getName() << "\n";
-                #endif
-                localDeps.push(ci);
-              }
-            
+            }
+          }  // end memcpy check
+
+          // conservative tainting decision
+          if (calledFun->empty()) {
+            // if it's empty but declared in our mod (one of the passed in C ones)
+            // and it returns a value, then consider the taint passed to the
+            // return
+            if (!calledFun->getName().contains("llvm") &&
+                !calledFun->getName().contains("core")) {
+#if DEBUG
+              errs() << "pushing presumed c lib func " << calledFun->getName() << "\n";
+#endif
+              localDeps.push(ci);
+            }
           }
           continue;
-            
         }
-	      unsigned int arg_num = ci->getNumArgOperands();
-	      
-	      // Find the index of the tainted argument
-	      for (unsigned int i = 0; i < arg_num; i++){
-          #if DEBUG
-            errs() << "DEBUG: comparing "<< *currVal <<" and " << *(ci->getArgOperand(i))<<"\n";
-          #endif
-	        if(ci->getArgOperand(i)==currVal) {
-            #if DEBUG
-              errs() << "DEBUG: pushing arg of "<< calledFunc->getName() <<"\n";
-            #endif
-	          interProcSinks.push_back((calledFunc->arg_begin() + i));
-            //MUST also push back the call inst.
+
+        unsigned int arg_num = ci->arg_size();
+#if DEBUG
+        errs() << "[Loop customUsers] Find tainted arg of " << calledFun->getName() << "\n";
+#endif
+        // Find the index of the tainted argument
+        for (unsigned int i = 0; i < arg_num; i++) {
+          auto* arg = ci->getArgOperand(i);
+          if (arg == curVal) {
+            auto funArg = calledFun->arg_begin() + i;
+#if DEBUG
+            errs() << "Found tainted arg: " << *arg << ", add fun arg (" << *funArg << "), the use (" << *ci << "), and orig input (" << *srcInput << ") to interProcFlows\n";
+#endif
+            interProcSinks.push_back(funArg);
+            // MUST also push back the call inst.
             interProcSinks.push_back(ci);
-            //MUST also push back the current srcOp
-            interProcSinks.push_back(srcOp);
-            if (Instruction* key = dyn_cast<Instruction>(ci)) {
-              //  iInfo->at(key).insert(srcOp);
+            // MUST also push back the current srcInput
+            interProcSinks.push_back(srcInput);
+            if (auto* key = dyn_cast<Instruction>(ci)) {
+              //  taintedInsts->at(key).insert(srcOp);
             }
-	          break;
-	        }
-	      }
-
-      } else if (Instruction* iUse = dyn_cast<Instruction>(use)) {
+            break;
+          }
+        }
+      } else if (auto* iUse = dyn_cast<Instruction>(use)) {
+#if DEBUG
+        errs() << "[Loop customUsers] use != ReturnInst & use != CallInst\n";
+#endif
         if (iUse->isTerminator()) {
           if (iUse->getNumSuccessors() > 1) {
-            //Add control deps off of a branch.
-            #if DEBUG
-              errs() << "DEBUG: adding condeps case\n";
-            #endif
+// Add control deps off of a branch.
+#if DEBUG
+            errs() << "adding condeps case\n";
+#endif
             val_vec controlDeps = getControlDeps(iUse);
-            //for all condep, add any reached loads, and add the store to the map 
-            for (Value* item : controlDeps) {
-              if (StoreInst* siCon = dyn_cast<StoreInst>(item)) {
+            // for all condep, add any reached loads, and add the store to the map
+            for (auto* item : controlDeps) {
+              if (auto* siCon = dyn_cast<StoreInst>(item)) {
                 localDeps.push(siCon);
               }
-            }//end for vals in condep
+            }
           }
-        }//end terminator check
-        #if DEBUG
-        //errs() << "DEBUG: pushing "<< *iUse<<"\n";
-        #endif
+        }
+
+#if DEBUG
+        errs() << "[Loop customUsers] Add use to localDeps\n";
+#endif
+        //* Here we may push inst from another procedure, crossing boundaries
         localDeps.push(iUse);
       }
     }
+#if DEBUG
+    errs() << "*** Loop localDeps ***\n";
+#endif
   }
 
+#if DEBUG
+  errs() << "*** traverseLocal ***\n";
+#endif
   return interProcSinks;
 }
 
-
-
-inst_vec findInputInsts(Module* M) 
-{
-  inst_vec sources;
-  func_vec io_name;
-  //Find io name annotations
-  for(GlobalVariable& gv : M->globals()) {
-    if(gv.getName().contains("IO_NAME"))  {
-      
-      if( Function* fp = dyn_cast<Function>(gv.getInitializer()->getOperand(0)->stripPointerCasts())) {
-      #if DEBUG
-        errs() << "Found io inst "<< fp->getName() <<"\n";
-      #endif  
-      	io_name.push_back(fp);
+inst_vec findInputInsts(Module* M) {
+#if DEBUG
+  errs() << "findInputInsts\n";
+#endif
+  inst_vec inputInsts;
+
+  // Find IO_NAME annotations
+  for (auto& gv : M->globals()) {
+    if (gv.getName().starts_with("IO_NAME")) {
+      if (auto* fp = dyn_cast<Function>(gv.getInitializer())) {
+#if DEBUG
+        errs() << "Found IO fun: " << fp->getName() << "\n";
+#endif
+        // Now, search for calls to those functions
+        for (auto& F : *M) {
+          for (auto& B : F) {
+            for (auto& I : B) {
+              if (auto* ci = dyn_cast<CallInst>(&I)) {
+                if (fp == ci->getCalledFunction()) {
+#if DEBUG
+                  errs() << "Found IO call: " << I << "\n";
+#endif
+                  inputInsts.push_back(&I);
+                  break;
+                }
+              }
+            }
+          }
+        }
       } else {
-      	errs() << "ERROR: could not unwrap function pointer from annotation\n";
+        // TODO: Say something else
+        errs() << "[ERROR] Could not unwrap function pointer from annotation\n";
       }
     }
-  } 
-
-  //now, search for calls to those functions
-  for (Function& func : * M) {
-    for (BasicBlock& bb : func) {
-      for(Instruction& inst : bb) {
-        if(CallInst* ci = dyn_cast<CallInst>(&inst)) {
-        	if(find(io_name.begin(), io_name.end(),ci->getCalledFunction())!=io_name.end()) {
-	          sources.push_back(&inst);
-	        }
-        }
-      }
-
-    } 
   }
-  return sources;
-}
 
+  return inputInsts;
+}
 
-/*See if a particular store is exposed to a use -- possibly replace couldLoadTainted*/
+// See if a particular store is exposed to a use -- possibly replace couldLoadTainted
 bool storePrecedesUse(Instruction* use, StoreInst* toMatch) {
-  queue<BasicBlock*> to_visit; 
-  vector<BasicBlock*> visited;
+  std::queue<BasicBlock*> to_visit;
+  std::vector<BasicBlock*> visited;
   BasicBlock* current;
-  vector<Value*> possible;
+  std::vector<Value*> possible;
   int found = 0;
   int skip = 1;
-  
+
   to_visit.push(use->getParent());
 
-  while(!to_visit.empty()) {
+  while (!to_visit.empty()) {
     current = to_visit.front();
     to_visit.pop();
-     
-    for(BasicBlock::reverse_iterator i = current->rbegin(), e = current->rend(); i!=e;++i) {
+
+    for (BasicBlock::reverse_iterator i = current->rbegin(), e = current->rend(); i != e; ++i) {
       Instruction* inst = &*i;
-      //don't look at li block before li
-      if((current == use->getParent())&&(skip)) {
-	      //errs() << "skipping" << *inst <<"\n";
-      	if(use==inst){
-	        skip = 0;
-      	}
-      	continue;
+      // don't look at li block before li
+      if ((current == use->getParent()) && (skip)) {
+        // errs() << "skipping" << *inst <<"\n";
+        if (use == inst) {
+          skip = 0;
+        }
+        continue;
+      }
+      // if(BI!=nullptr) {
+      // errs() << "looking at" << *BI <<"\n";
+      if (StoreInst* si = dyn_cast<StoreInst>(inst)) {
+        // errs() << "found a store" << *si <<"\n";
+        if (si->getPointerOperand() == toMatch->getPointerOperand()) {
+          possible.push_back(si);
+          found = 1;
+          break;
+        }
       }
-      //if(BI!=nullptr) {
-      //errs() << "looking at" << *BI <<"\n";
-    	if (StoreInst* si = dyn_cast<StoreInst>(inst)) {
-    	  //errs() << "found a store" << *si <<"\n";
-    	  if (si->getPointerOperand() == toMatch->getPointerOperand()) {
-	        possible.push_back(si);
-	        found = 1;
-	        break;
-	      }
-    	}
     }
-    //we found a store in this node
-    if(found) {
+    // we found a store in this node
+    if (found) {
       found = 0;
       continue;
     }
     /*add pred. blocks to our queue*/
     for (auto PI = pred_begin(current); PI != pred_end(current); ++PI) {
-      //if it's new
-      if(!(find(visited.begin(), visited.end(), *PI) != visited.end())){
-	      visited.push_back(*PI);
-	      to_visit.push(*PI);
+      // if it's new
+      if (!(find(visited.begin(), visited.end(), *PI) != visited.end())) {
+        visited.push_back(*PI);
+        to_visit.push(*PI);
       }
     }
   }
   /*Was one of the preceding writes the store in question?*/
-  for(Value* poss : possible) {
-    if(poss == toMatch) {
-	    return true;
+  for (Value* poss : possible) {
+    if (poss == toMatch) {
+      return true;
     }
-    
   }
-  //this use does not consume the tainted store
+  // this use does not consume the tainted store
   return false;
 }
 
-
 /*See if the same EP is used in multiple GEPI, check if exposed*/
 inst_vec couldMatchGEPI(GetElementPtrInst* tGEPI) {
-  queue<BasicBlock*> to_visit; 
-  vector<BasicBlock*> visited;
+  std::queue<BasicBlock*> to_visit;
+  std::vector<BasicBlock*> visited;
   BasicBlock* current;
-  vector<Value*> possible;
+  std::vector<Value*> possible;
   inst_vec matching;
   int found = 0;
   int skip = 1;
-  
+
   to_visit.push(tGEPI->getParent());
 
-  while(!to_visit.empty()) {
+  while (!to_visit.empty()) {
     current = to_visit.front();
     to_visit.pop();
-     
-     //forwards exploration
-    for(Instruction& i : *current) {
+
+    // forwards exploration
+    for (Instruction& i : *current) {
       Instruction* inst = &i;
-      //don't look at gepi block before gepi
-      if((current == tGEPI->getParent())&&(skip)) {
-	      //errs() << "skipping" << *inst <<"\n";
-      	if(tGEPI==inst){
-	        skip = 0;
-      	}
-      	continue;
+      // don't look at gepi block before gepi
+      if ((current == tGEPI->getParent()) && (skip)) {
+        // errs() << "skipping" << *inst <<"\n";
+        if (tGEPI == inst) {
+          skip = 0;
+        }
+        continue;
       }
-      //if(BI!=nullptr) {
-      //errs() << "looking at" << *BI <<"\n";
-    	if (GetElementPtrInst* another = dyn_cast<GetElementPtrInst>(inst)) {
-    	  //errs() << "found a store" << *si <<"\n";
-        //check if the ops match
-    	  if (another->getPointerOperand() == tGEPI->getPointerOperand()) {
-          //check if used in load or store
-	        for (Value* pUse : another->users()) {
+      // if(BI!=nullptr) {
+      // errs() << "looking at" << *BI <<"\n";
+      if (GetElementPtrInst* another = dyn_cast<GetElementPtrInst>(inst)) {
+        // errs() << "found a store" << *si <<"\n";
+        // check if the ops match
+        if (another->getPointerOperand() == tGEPI->getPointerOperand()) {
+          // check if used in load or store
+          for (Value* pUse : another->users()) {
             if (isa<StoreInst>(pUse)) {
               found = 1;
               break;
             }
           }
-	        //no store
+          // no store
           if (!found) {
-            #if DEBUG
-            errs() << "matching GEPS: " << *another<<" and " << *tGEPI <<"\n";
-            #endif
+#if DEBUG
+            errs() << "matching GEPS: " << *another << " and " << *tGEPI << "\n";
+#endif
             matching.push_back(another);
           }
-	      }
-    	}
+        }
+      }
     }
-    //we found a store in this node
-    if(found) {
+    // we found a store in this node
+    if (found) {
       found = 0;
       continue;
     }
     /*add succ. blocks to our queue*/
     for (auto SI = succ_begin(current); SI != succ_end(current); ++SI) {
-      //if it's new
-      if(!(find(visited.begin(), visited.end(), *SI) != visited.end())){
-	      visited.push_back(*SI);
-	      to_visit.push(*SI);
+      // if it's new
+      if (!(find(visited.begin(), visited.end(), *SI) != visited.end())) {
+        visited.push_back(*SI);
+        to_visit.push(*SI);
       }
     }
   }
-  
+
   return matching;
 }
 
 /*Find first use of a pointer after a callInst, for pass-by-ref*/
 Instruction* ptrAfterCall(Value* ptr, CallInst* ci) {
-  queue<BasicBlock*> to_visit; 
-  vector<BasicBlock*> visited;
+  std::queue<BasicBlock*> to_visit;
+  std::vector<BasicBlock*> visited;
   BasicBlock* current;
-  
+
   int found = 0;
   int skip = 1;
-  
+
   to_visit.push(ci->getParent());
 
-  while(!to_visit.empty()) {
+  while (!to_visit.empty()) {
     current = to_visit.front();
     to_visit.pop();
-     
-     //forwards exploration
-    for(Instruction& i : *current) {
+
+    // forwards exploration
+    for (Instruction& i : *current) {
       Instruction* inst = &i;
-      //don't look at gepi block before gepi
-      if((current == ci->getParent())&&(skip)) {
-	      //errs() << "skipping" << *inst <<"\n";
-      	if(ci==inst){
-	        skip = 0;
-      	}
-      	continue;
+      // don't look at gepi block before gepi
+      if ((current == ci->getParent()) && (skip)) {
+        // errs() << "skipping" << *inst <<"\n";
+        if (ci == inst) {
+          skip = 0;
+        }
+        continue;
       }
-      //if the inst is a use of the pointer
-    	if (find(ptr->user_begin(),ptr->user_end(), inst)!=ptr->user_end()) {
+      // if the inst is a use of the pointer
+      if (std::find(ptr->user_begin(), ptr->user_end(), inst) != ptr->user_end()) {
         return inst;
       }
-      
     }
     /*add succ. blocks to our queue*/
     for (auto SI = succ_begin(current); SI != succ_end(current); ++SI) {
-      //if it's new
-      if(!(find(visited.begin(), visited.end(), *SI) != visited.end())){
-	      visited.push_back(*SI);
-	      to_visit.push(*SI);
+      // if it's new
+      if (!(find(visited.begin(), visited.end(), *SI) != visited.end())) {
+        visited.push_back(*SI);
+        to_visit.push(*SI);
       }
     }
   }
   return nullptr;
 }
 
-
-/*This is a function to return all the control dependent stores off of a control inst 
-Input -- ti, the (formerly) terminator inst 
+/*This is a function to return all the control dependent stores off of a control inst
+Input -- ti, the (formerly) terminator inst
 Output -- list of deps */
-val_vec getControlDeps(Instruction* ti)
-{
+val_vec getControlDeps(Instruction* ti) {
   val_vec deps;
   int succ_i = 0;
   while (succ_i < ti->getNumSuccessors()) {
     BasicBlock* bb = ti->getSuccessor(succ_i);
     succ_i++;
-    for(Instruction& inst : *bb) {
-      //if we encounter a store, add to deps
-      if(isa<StoreInst>(&inst)) {
-	      deps.push_back(&inst);
-      } //if we encounter a multi succ branch, recursive call, if we encouter a join, continue to next succ
-      else if(inst.isTerminator()) {
-
-    	  if(ti->getNumSuccessors() > 1) {
-	        vector<Value*> intermed = getControlDeps(&inst);
-	        for(Value* item : intermed) {
-	          deps.push_back(item);
-	        } 
-	      } else {
-	        break;
-	      }
+    for (Instruction& inst : *bb) {
+      // if we encounter a store, add to deps
+      if (isa<StoreInst>(&inst)) {
+        deps.push_back(&inst);
+      }  // if we encounter a multi succ branch, recursive call, if we encouter a join, continue to next succ
+      else if (inst.isTerminator()) {
+        if (ti->getNumSuccessors() > 1) {
+          std::vector<Value*> intermed = getControlDeps(&inst);
+          for (Value* item : intermed) {
+            deps.push_back(item);
+          }
+        } else {
+          break;
+        }
       }
     }
   }
   return deps;
 }
 
-
-/*Get direct uses (at src level, not IR) of a fresh var*/
-inst_vec traverseDirectUses(Instruction* root)
-{
+// Get direct uses (at src level, not IR) of a fresh var
+inst_vec traverseDirectUses(Instruction* root) {
   inst_vec uses;
-  queue<Instruction*> localDeps;
+  std::queue<Instruction*> localDeps;
   localDeps.push(root);
-  
-  //Edge case: check if return is an internally allocated stack var
+
+  // Edge case: check if return is an internally allocated stack var
   Value* retPtr;
   Instruction* last = &(root->getFunction()->back().back());
   if (ReturnInst* ri = dyn_cast<ReturnInst>(last)) {
     for (Use& op : ri->operands()) {
-      if(LoadInst* li  = dyn_cast<LoadInst>(op.get())) {
+      if (LoadInst* li = dyn_cast<LoadInst>(op.get())) {
         retPtr = li->getPointerOperand();
       }
     }
-
   }
 
-  while(!localDeps.empty()) {
+  while (!localDeps.empty()) {
     Instruction* currVal = localDeps.front();
     uses.push_back(currVal);
     localDeps.pop();
     for (Value* use : currVal->users()) {
-      //if it's a gepi, see if there are others that occur afterwards 
-      //      errs() << *use <<" is a direct use of " << *currVal<<"\n";
+      // if it's a gepi, see if there are others that occur afterwards
+      //       errs() << *use <<" is a direct use of " << *currVal<<"\n";
       if (isa<GetElementPtrInst>(use)) {
         inst_vec matching = couldMatchGEPI(dyn_cast<GetElementPtrInst>(use));
         for (Instruction* item : matching) {
-	  //  errs() << "pushing to local deps " << *item <<"\n";
+          //  errs() << "pushing to local deps " << *item <<"\n";
           localDeps.push(item);
         }
-      }
-      else if (ReturnInst* ri = dyn_cast<ReturnInst>(use)) {
-        for(Value* calls : ri->getFunction()->users()) {
-          if(isa<CallInst>(calls)) {
+      } else if (ReturnInst* ri = dyn_cast<ReturnInst>(use)) {
+        for (Value* calls : ri->getFunction()->users()) {
+          if (isa<CallInst>(calls)) {
             uses.push_back(dyn_cast<Instruction>(calls));
-            
           }
         }
       } else if (StoreInst* si = dyn_cast<StoreInst>(use)) {
-        //if stores into ret pointer, treat as above
+        // if stores into ret pointer, treat as above
         if (si->getPointerOperand() == retPtr) {
-          for(Value* calls : si->getFunction()->users()) {
-            if(isa<CallInst>(calls)) {
-             uses.push_back(dyn_cast<Instruction>(calls));
-            
+          for (Value* calls : si->getFunction()->users()) {
+            if (isa<CallInst>(calls)) {
+              uses.push_back(dyn_cast<Instruction>(calls));
             }
-         }
+          }
         }
       } else if (BranchInst* bi = dyn_cast<BranchInst>(use)) {
-        //if a use is a branch inst the atomic region needs to 
-        //dominate the successors
+        // if a use is a branch inst the atomic region needs to
+        // dominate the successors
         for (BasicBlock* bbInterior : bi->successors()) {
-          //skip panic blocks, otherwise there will be no post dom
+          // skip panic blocks, otherwise there will be no post dom
           if (bbInterior->getName().equals("panic")) {
             continue;
           }
           uses.push_back(&(bbInterior->front()));
         }
       } else if (CallInst* ci = dyn_cast<CallInst>(use)) {
-        if(ci->hasName() && ci->getName().startswith("_")) {
-          //fall through  
+        if (ci->hasName() && ci->getName().startswith("_")) {
+          // fall through
         } else {
           uses.push_back(ci);
           continue;
         }
       }
       if (Instruction* iUse = dyn_cast<Instruction>(use)) {
-        //see if load is to another var or just internal ssa
+        // see if load is to another var or just internal ssa
         if (LoadInst* li = dyn_cast<LoadInst>(iUse)) {
-          if(li->hasName()) {
-            //Hacky --verify that this is always true
-            if(!li->getName().startswith("_")) {
+          if (li->hasName()) {
+            // Hacky --verify that this is always true
+            if (!li->getName().startswith("_")) {
               continue;
             }
           }
@@ -853,5 +911,3 @@ inst_vec traverseDirectUses(Instruction* root)
 
   return uses;
 }
-
-
diff --git a/ocelot/AtomicRegionInference/src/include/ConsistentInference.h b/ocelot/AtomicRegionInference/src/include/ConsistentInference.h
deleted file mode 100644
index 1f7a429..0000000
--- a/ocelot/AtomicRegionInference/src/include/ConsistentInference.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef __CONSISTENTINFERENCE__
-#define __CONSISTENTINFERENCE__
-
-#include "HelperTypes.h"
-using namespace llvm;
-using namespace std;
-
-class ConsistentInference {
-public:
-  ConsistentInference(Pass* _pass, Module* _m, Function* _as, Function* _ae) { 
-    pass = _pass;
-    m = _m; 
-    atomStart = _as;
-    atomEnd = _ae;
-  }
-  void inferConsistent(map<int, inst_vec> allSets);
-  void inferFresh(inst_vec_vec allSets); 
-  void addRegion(inst_vec conSet, int regType);
-  Function* commonPredecessor(map<Instruction*, BasicBlock*> blocks, Function* root);
-  Instruction* insertRegionInst(int regInst, Instruction* insertBefore);
-  bool sameFunction(map<Instruction*, BasicBlock*> blockMap);
-  Instruction* truncate(BasicBlock* bb, bool forwards, inst_vec conSet, set<Function*> nested);
-  vector<Function*> deepCaller(Function* root, vector<Function*> funcList, Function** goal);
-  inst_inst_pair findSmallest(vector<inst_inst_pair>regionsFound);
-  BasicBlock* getLoopEnd(BasicBlock* bb);
-  bool loopCheck(BasicBlock* bb);
-  int getSubLength(BasicBlock* bb, Instruction* end, vector<BasicBlock*> visited);
-
-
-
-private:
-  Pass* pass;
-  Module* m;
-  Function* atomStart;
-  Function* atomEnd;
-};
-
-#endif
diff --git a/ocelot/AtomicRegionInference/src/include/HelperTypes.h b/ocelot/AtomicRegionInference/src/include/HelperTypes.h
index 843c498..9565b1f 100644
--- a/ocelot/AtomicRegionInference/src/include/HelperTypes.h
+++ b/ocelot/AtomicRegionInference/src/include/HelperTypes.h
@@ -1,52 +1,41 @@
-#ifndef __HELPERTYPES__
-#define __HELPERTYPES__
-
-#include "llvm/Pass.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/ADT/ilist.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/SymbolTableListTraits.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/CallGraph.h"
-#include <map>
-#include <queue>
-#include <algorithm>
-#include <set>
-//#include <pair>
-
-#define DEBUG 0
-
-using namespace llvm; 
-
-typedef std::vector<Value*> val_vec;
-typedef std::vector<BasicBlock*> bb_vec;
-typedef std::vector<Instruction*> inst_vec;
-typedef std::map<Value*, inst_vec> val_insts_map;
-typedef std::vector<GlobalVariable*> gv_vec;
-typedef std::vector<std::pair<Value*, Instruction*>> val_inst_vec;
-typedef std::vector<std::pair<Instruction*, Instruction*>> inst_inst_vec;
-typedef std::map<Instruction*, val_vec> inst_vals_map;
-typedef std::map<Instruction*, std::set<Instruction*>> inst_insts_map;
-typedef std::vector<Function*> func_vec;
-typedef std::vector<inst_vec> inst_vec_vec;
-typedef std::pair<Instruction*, Instruction*> inst_inst_pair;
-
-extern gv_vec gv_list;
-
-/*bool isArray(Value* v);
-bool isTask(Function* F);
-bool isMemcpy(Instruction* I);
-uint64_t getSize(Value* val);
-int is_atomic_boundary(Instruction* ci);
-#define OVERHEAD 0
-*/
-#endif
+#ifndef __HELPERTYPES__
+#define __HELPERTYPES__
+
+#include <algorithm>
+#include <map>
+#include <queue>
+#include <set>
+
+#include "llvm/ADT/ilist.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/SymbolTableListTraits.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG 1
+
+using namespace llvm;
+
+typedef std::vector<Value*> val_vec;
+typedef std::vector<BasicBlock*> bb_vec;
+typedef std::vector<Instruction*> inst_vec;
+typedef std::map<Value*, inst_vec> val_insts_map;
+typedef std::vector<GlobalVariable*> gv_vec;
+typedef std::vector<std::pair<Value*, Instruction*>> val_inst_vec;
+typedef std::pair<Instruction*, Instruction*> inst_inst_pair;
+typedef std::vector<inst_inst_pair> inst_inst_vec;
+typedef std::map<Instruction*, val_vec> inst_vals_map;
+typedef std::map<Instruction*, std::set<Instruction*>> inst_insts_map;
+typedef std::vector<Function*> func_vec;
+typedef std::vector<inst_vec> inst_vec_vec;
+
+extern gv_vec gv_list;
+
+#endif
diff --git a/ocelot/AtomicRegionInference/src/include/Helpers.h b/ocelot/AtomicRegionInference/src/include/Helpers.h
new file mode 100644
index 0000000..8e940f0
--- /dev/null
+++ b/ocelot/AtomicRegionInference/src/include/Helpers.h
@@ -0,0 +1,15 @@
+#ifndef __HELPERS__
+#define __HELPERS__
+
+#include <string>
+
+#include "HelperTypes.h"
+
+using namespace llvm;
+
+std::string getSimpleNodeLabel(const Value* Node);
+bool isAnnot(const StringRef annotName);
+void printInstInsts(const inst_insts_map& iim, bool onlyCalls = false);
+void printInsts(const inst_vec& iv);
+
+#endif
\ No newline at end of file
diff --git a/ocelot/AtomicRegionInference/src/include/InferAtomicPass.h b/ocelot/AtomicRegionInference/src/include/InferAtomicPass.h
deleted file mode 100644
index bd0036a..0000000
--- a/ocelot/AtomicRegionInference/src/include/InferAtomicPass.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef __INFERPASS__
-#define __INFERPASS__
-
-#include "HelperTypes.h"
-#include "ConsistentInference.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/ExecutionEngine/MCJIT.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cstdlib>
-#include <memory>
-#include <string>
-#include <vector>
-
-using namespace llvm;
-
-class InferAtomicModulePass : public ModulePass {
-	public:
-		static char ID;
-		InferAtomicModulePass() : ModulePass(ID) {}
-
-		virtual bool runOnModule(Module &M);
-		int getMaxCost(Function* f);
-		void mergeRegions(Function* f);
-		void getAnnotations(map<int, inst_vec>* conSets, inst_vec_vec* freshVars, inst_insts_map inputs, inst_vec* toDelete); 
-		inst_vec_vec collectFresh(inst_vec_vec startingPoints, inst_insts_map info);
-		map<int, inst_vec> collectCon(map<int, inst_vec> startingPointa, inst_insts_map inputMap);
-		void removeAnnotations(inst_vec* toDelete); 
-
-
-		virtual void getAnalysisUsage(AnalysisUsage& AU) const {
-			AU.setPreservesAll();
-			//AU.addRequired<AAResultsWrapperPass>();
-			//AU.addRequired<CallGraphWrapperPass>();
-			AU.addRequired<PostDominatorTreeWrapperPass>();
-			AU.addRequired<DominatorTreeWrapperPass>();
-		}
-		Module* getModule() {
-			return m;
-		}
-		Module* setModule(Module* _m) {
-			return m = _m;
-		}
-	private:
-		Module* m;
-		int capacitorSize;
-		Function* atomStart;
-		Function* atomEnd;
-		
-		
-};
-
-#endif
diff --git a/ocelot/AtomicRegionInference/src/include/InferAtoms.h b/ocelot/AtomicRegionInference/src/include/InferAtoms.h
new file mode 100644
index 0000000..19701e0
--- /dev/null
+++ b/ocelot/AtomicRegionInference/src/include/InferAtoms.h
@@ -0,0 +1,54 @@
+#ifndef __INFERATOMS__
+#define __INFERATOMS__
+
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "Helpers.h"
+#include "InferFreshCons.h"
+#include "TaintTracker.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Pass.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Passes/PassPlugin.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+struct InferAtomsPass : public PassInfoMixin<InferAtomsPass> {
+ public:
+  InferAtomsPass() {}
+  PreservedAnalyses run(Module& M, ModuleAnalysisManager& AM);
+
+  void getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_vec* freshVars, inst_insts_map inputMap, inst_vec* toDelete);
+  inst_vec_vec collectFresh(inst_vec_vec startingPoints, inst_insts_map info);
+  std::map<int, inst_vec> collectCons(std::map<int, inst_vec> startingPointa, inst_insts_map inputMap);
+  void removeAnnotations(inst_vec* toDelete);
+  void setModule(Module* _M) { M = _M; }
+
+ private:
+  Module* M;
+  Function* atomStart;
+  Function* atomEnd;
+};
+
+extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo
+llvmGetPassPluginInfo() {
+  return {
+      .APIVersion = LLVM_PLUGIN_API_VERSION,
+      .PluginName = "Atomic Region Inference Pass",
+      .PluginVersion = "v0.1",
+      .RegisterPassBuilderCallbacks = [](PassBuilder& PB) {
+        PB.registerPipelineStartEPCallback(
+            [](ModulePassManager& MPM, OptimizationLevel Level) {
+              MPM.addPass(InferAtomsPass());
+            });
+      }};
+}
+
+#endif
diff --git a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
new file mode 100644
index 0000000..e9defee
--- /dev/null
+++ b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
@@ -0,0 +1,36 @@
+#ifndef __INFERFRESHCONS__
+#define __INFERFRESHCONS__
+
+#include "Helpers.h"
+
+using namespace llvm;
+
+struct InferFreshCons {
+ public:
+  InferFreshCons(FunctionAnalysisManager* _FAM, Module* _m, Function* _as, Function* _ae) {
+    FAM = _FAM;
+    m = _m;
+    atomStart = _as;
+    atomEnd = _ae;
+  }
+  void inferConsistent(std::map<int, inst_vec> allSets);
+  void inferFresh(inst_vec_vec allSets);
+  void addRegion(inst_vec conSet, int regType);
+  Function* findCandidate(std::map<Instruction*, BasicBlock*> blocks, Function* root);
+  Instruction* insertRegionInst(int regInst, Instruction* insertBefore);
+  bool sameFunction(std::map<Instruction*, BasicBlock*> blockMap);
+  Instruction* truncate(BasicBlock* bb, bool forwards, inst_vec conSet, std::set<Function*> nested);
+  std::vector<Function*> deepCaller(Function* root, std::vector<Function*>& funcList, Function** goal);
+  inst_inst_pair findShortest(inst_inst_vec regionsFound);
+  BasicBlock* getLoopEnd(BasicBlock* bb);
+  bool loopCheck(BasicBlock* bb);
+  int getSubLength(BasicBlock* bb, Instruction* end, std::vector<BasicBlock*> visited);
+
+ private:
+  FunctionAnalysisManager* FAM;
+  Module* m;
+  Function* atomStart;
+  Function* atomEnd;
+};
+
+#endif
diff --git a/ocelot/AtomicRegionInference/src/include/TaintTracker.h b/ocelot/AtomicRegionInference/src/include/TaintTracker.h
index ffd90ef..1d7eaf7 100644
--- a/ocelot/AtomicRegionInference/src/include/TaintTracker.h
+++ b/ocelot/AtomicRegionInference/src/include/TaintTracker.h
@@ -1,40 +1,17 @@
-#include "llvm/Pass.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
-#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include <stack>
-#include <algorithm>
-#include <vector>
-#include <map>
-#include <queue>
-#include <set>
-#include <fstream>
-#include "HelperTypes.h"
+#ifndef __TAINTTRACKER__
+#define __TAINTTRACKER__
 
-using namespace llvm;
-using namespace std;
+#include "Helpers.h"
 
+using namespace llvm;
 
 inst_insts_map buildInputs(Module* m);
 val_vec traverseLocal(Value* tainted, Instruction* srcOp, inst_insts_map* buildMap, Instruction* caller);
-
-inst_vec findInputInsts(Module* M);  
-Instruction* ptrAfterCall(Value* ptr, CallInst* ci); 
+inst_vec findInputInsts(Module* M);
+Instruction* ptrAfterCall(Value* ptr, CallInst* ci);
 bool storePrecedesUse(Instruction* use, StoreInst* toMatch);
 inst_vec couldMatchGEPI(GetElementPtrInst* tGEPI);
 val_vec getControlDeps(Instruction* ti);
 inst_vec traverseDirectUses(Instruction* root);
+
+#endif

From c7fd8d0757eb1dbc08e17ffe55e356116a7cacb6 Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Sat, 16 Dec 2023 23:22:31 -0500
Subject: [PATCH 02/18] [InferAtomsPass] Makefile to simplify testing

Useful extensible shortcuts to running tests.
---
 .gitignore                            |  3 ++-
 benchmarks/ctests/example01.c         |  3 ---
 ocelot/AtomicRegionInference/Makefile | 20 ++++++++++++++++++++
 3 files changed, 22 insertions(+), 4 deletions(-)
 create mode 100644 ocelot/AtomicRegionInference/Makefile

diff --git a/.gitignore b/.gitignore
index 5326aab..225f44b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 .vscode
-ocelot/AtomicRegionInference/build
\ No newline at end of file
+ocelot/AtomicRegionInference/build
+benchmarks/ctests/*.ll
\ No newline at end of file
diff --git a/benchmarks/ctests/example01.c b/benchmarks/ctests/example01.c
index 0e61a67..4b5b66f 100644
--- a/benchmarks/ctests/example01.c
+++ b/benchmarks/ctests/example01.c
@@ -1,8 +1,5 @@
 #include <stdio.h>
 
-// int x;
-// int y;
-
 void Fresh(int x) { printf("Fresh\n"); }
 void Consistent(int x, int id) { printf("Consistent\n"); }
 
diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile
new file mode 100644
index 0000000..41a8cf5
--- /dev/null
+++ b/ocelot/AtomicRegionInference/Makefile
@@ -0,0 +1,20 @@
+.PHONY: clean_tests clean eg1 eg2
+
+eg1:
+	TEST=example01 make test
+eg2:
+	TEST=example02 make test
+
+test:
+	$(MAKE) -C build all
+	clang -S -emit-llvm\
+		-fpass-plugin=build/src/InferAtomsPass.dylib\
+		-fno-discard-value-names\
+		../../benchmarks/ctests/$(TEST).c\
+		-o ../../benchmarks/ctests/$(TEST).ll
+
+clean_tests:
+	find ../../benchmarks/ctests -name "*.ll" -exec rm -rf {} \;
+
+clean:
+	rm -rf build

From 58855d2a5cc9c3f6fbb64a5b6cf28e85ba312d1e Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Sun, 17 Dec 2023 23:08:35 -0500
Subject: [PATCH 03/18] [WIP][InferAtomsPass] Instruction scheduling

Step 1 of optimizing atomic regions for (smaller) size.

In essence, it's now necessary to have a complete picture of which
instructions are tainted (whereas before we really only needed to know
the boundaries of a region).

Test plan: `make eg3` for an example where the freshness atomic region
size is reduced thanks to the optimization.
---
 .gitignore                                    |   4 +-
 benchmarks/ctests/example03.c                 |  19 ++
 ocelot/AtomicRegionInference/Makefile         |   6 +
 ocelot/AtomicRegionInference/README.md        |   3 +
 .../AtomicRegionInference/src/InferAtoms.cpp  |  11 +-
 .../src/InferFreshCons.cpp                    | 200 ++++++++++++------
 .../src/TaintTracker.cpp                      | 140 ++++++++++--
 .../src/include/HelperTypes.h                 |   5 +-
 .../src/include/Helpers.h                     |   3 +
 .../src/include/InferFreshCons.h              |  11 +-
 .../src/include/TaintTracker.h                |   1 +
 11 files changed, 312 insertions(+), 91 deletions(-)
 create mode 100644 benchmarks/ctests/example03.c

diff --git a/.gitignore b/.gitignore
index 225f44b..17712eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 .vscode
 ocelot/AtomicRegionInference/build
-benchmarks/ctests/*.ll
\ No newline at end of file
+benchmarks/ctests/*.ll
+
+.DS_Store
\ No newline at end of file
diff --git a/benchmarks/ctests/example03.c b/benchmarks/ctests/example03.c
new file mode 100644
index 0000000..98b9d0d
--- /dev/null
+++ b/benchmarks/ctests/example03.c
@@ -0,0 +1,19 @@
+void Fresh(int x) {}
+void Consistent(int x, int id) {}
+
+void atomic_start() {}
+void atomic_end() {}
+
+int input() { return 0; }
+int (*IO_NAME)() = input;
+
+void log(int x) {}
+
+void app() {
+  int x = input();
+  int y = 1;
+  int z = y + 1;
+  log(z);
+  log(x);
+  Fresh(x);
+}
\ No newline at end of file
diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile
index 41a8cf5..9ab940c 100644
--- a/ocelot/AtomicRegionInference/Makefile
+++ b/ocelot/AtomicRegionInference/Makefile
@@ -4,9 +4,15 @@ eg1:
 	TEST=example01 make test
 eg2:
 	TEST=example02 make test
+eg3:
+	TEST=example03 make test
 
 test:
 	$(MAKE) -C build all
+	clang -S -emit-llvm\
+		-fno-discard-value-names\
+		../../benchmarks/ctests/$(TEST).c\
+		-o ../../benchmarks/ctests/$(TEST).orig.ll
 	clang -S -emit-llvm\
 		-fpass-plugin=build/src/InferAtomsPass.dylib\
 		-fno-discard-value-names\
diff --git a/ocelot/AtomicRegionInference/README.md b/ocelot/AtomicRegionInference/README.md
index 38c61e6..5895b8c 100644
--- a/ocelot/AtomicRegionInference/README.md
+++ b/ocelot/AtomicRegionInference/README.md
@@ -16,3 +16,6 @@ You may bootstrap Clang to use the pass to compile a C file like so:
 ```sh
 clang -S -emit-llvm -fpass-plugin=src/InferAtomsPass.dylib -fno-discard-value-names ../../../benchmarks/ctests/example01.c
 ```
+
+Or, when testing, use the shortcuts provided in the Makefile (e.g., `make eg1`),
+which produce two LLVM IRs with and without the pass enabled.
diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
index 3843383..b0219cd 100644
--- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp
+++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
@@ -219,6 +219,7 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
 #if DEBUG
             errs() << "[Loop Inst] Fresh arg: " << *arg << "\n";
 #endif
+
             if (auto* inst = dyn_cast<Instruction>(arg)) {
 #if DEBUG
               errs() << "[Loop Inst] arg = Instruction, add to v\n";
@@ -240,7 +241,7 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
 #endif
                   if (ptrUse != inst) {
                     if (auto* liUse = dyn_cast<LoadInst>(ptrUse)) {
-                      errs() << "[Loop ptr users] Diff LoadInst ptrUse, add to v\n";
+                      errs() << "[Loop ptr users] ptrUse diff from Fresh arg, add to v\n";
                       v.emplace(liUse);
                     }
                   }
@@ -443,23 +444,23 @@ inst_vec_vec InferAtomsPass::collectFresh(inst_vec_vec freshVars, inst_insts_map
     errs() << "[Loop freshVars] Go over varSet:\n";
     printInsts(varSet);
 #endif
-    std::set<Instruction*> unique, callChain;
+    inst_set unique, callChain;
     for (auto* var : varSet) {
 #if DEBUG
       errs() << "[Loop varSet] Cur var: " << *var << "\n";
 #endif
       // Uses (forwards) are direct only (might need a little chaining for direct in rs to be direct in IR)
-      inst_vec uses = traverseDirectUses(var);
+      inst_vec uses = traverseUses(var);
 
 #if DEBUG
       errs() << "[Loop varSet] Go over uses of var\n";
 #endif
       for (auto* use : uses) {
 #if DEBUG
-        errs() << "[Loop uses] Cur use: " << *use << "\n";
-        errs() << "[Loop uses] Add use to unique\n";
+        errs() << "[Loop uses] Add use: " << *use << "\n";
 #endif
         unique.insert(use);
+
         for (auto* input : inputMap[use]) {
 #if DEBUG
           errs() << "[Loop inputMap[use]] Add src input of use to unique: " << *input << "\n";
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index 6258661..0e1f93b 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -2,24 +2,23 @@
 
 #include "llvm/Analysis/PostDominators.h"
 
-Instruction* InferFreshCons::insertRegionInst(int toInsertType, Instruction* insertBefore) {
+Instruction* InferFreshCons::insertRegionInst(InsertKind insertKind, Instruction* insertBefore) {
 #if DEBUG
   errs() << "=== insertRegionInst ===\n";
 #endif
   Instruction* call;
   IRBuilder<> builder(insertBefore);
-  // Insert a region start inst
-  if (toInsertType == 0) {
+
+  if (insertKind == Start) {
 #if DEBUG
     errs() << "Insert start before: " << *insertBefore << "\n";
 #endif
     call = builder.CreateCall(this->atomStart);
   } else {
-    // Insert a region end inst
 #if DEBUG
     errs() << "Insert end before: " << *insertBefore << "\n";
 #endif
-    call = builder.CreateCall(atomEnd);
+    call = builder.CreateCall(this->atomEnd);
   }
 
 #if DEBUG
@@ -31,10 +30,11 @@ Instruction* InferFreshCons::insertRegionInst(int toInsertType, Instruction* ins
 // If a direct pred is also a successor, then it's a for loop block
 bool InferFreshCons::loopCheck(BasicBlock* B) {
   auto BName = getSimpleNodeLabel(B);
+
   if (!B->hasNPredecessors(1)) {
     for (auto it = pred_begin(B), et = pred_end(B); it != et; ++it) {
-      BasicBlock* predecessor = *it;
-      StringRef pname = predecessor->getName().drop_front(2);
+      auto* predecessor = *it;
+      auto pname = predecessor->getName().drop_front(2);
       // errs() << "comparing " << pname<< " and " <<bbname <<"\n";
       if (pname.compare_numeric(BName) > 0) {
         //   errs() << "comparison is true\n";
@@ -42,13 +42,14 @@ bool InferFreshCons::loopCheck(BasicBlock* B) {
       }
     }
   }
+
   return false;
 }
 
 // Find the first block after a for loop
 BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) {
-  Instruction* ti = bb->getTerminator();
-  BasicBlock* end = ti->getSuccessor(0);
+  auto* ti = bb->getTerminator();
+  auto* end = ti->getSuccessor(0);
   ti = end->getTerminator();
   // errs() << "end is " << end->getName() << "\n";
   // for switch inst, succ 0 is the fall through
@@ -64,7 +65,7 @@ void InferFreshCons::inferConsistent(std::map<int, inst_vec> consSets) {
 #if DEBUG
     errs() << "[InferConsistent] starting set " << id << "\n";
 #endif
-    addRegion(set, 0);
+    addRegion(set, Consistent);
   }
 }
 
@@ -74,31 +75,31 @@ void InferFreshCons::inferFresh(inst_vec_vec freshSets) {
   errs() << "=== inferFresh ===\n";
 #endif
   // TODO: start with pseudo code structure from design doc
-  for (auto set : freshSets) addRegion(set, 1);
+  for (auto freshSet : freshSets) addRegion(freshSet, Fresh);
 #if DEBUG
   errs() << "*** inferFresh ***\n";
 #endif
 }
 
-// Region type: 0 for Consistent, 1 for Fresh
-void InferFreshCons::addRegion(inst_vec set, int regionType) {
+void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
 #if DEBUG
   errs() << "=== addRegion ===\n";
 #endif
   // A map from set item to bb
-  std::map<Instruction*, BasicBlock*> blocks;
+  std::map<Instruction*, BasicBlock*> targetBlocks;
   // A queue of regions that still need to be processed
   std::queue<std::map<Instruction*, BasicBlock*>> regionsNeeded;
 
 #if DEBUG
   errs() << "Build map from inst to bb\n";
 #endif
-  for (auto* item : set) blocks[item] = item->getParent();
+  for (auto* targetInst : targetInsts)
+    targetBlocks[targetInst] = targetInst->getParent();
 
 #if DEBUG
   errs() << "Add map to regionsNeeded\n";
 #endif
-  regionsNeeded.push(blocks);
+  regionsNeeded.push(targetBlocks);
 
   auto* root = m->getFunction("app");
 
@@ -106,56 +107,53 @@ void InferFreshCons::addRegion(inst_vec set, int regionType) {
   inst_inst_vec regionsFound;
   while (!regionsNeeded.empty()) {
     // Need to raise all blocks in the map until they are the same
-    auto blockMap = regionsNeeded.front();
+    auto blocks = regionsNeeded.front();
     regionsNeeded.pop();
-    // Record which functions have been travelled through
-    std::set<Function*> nested;
+    // Record which functions have been traveled through
+    std::set<Function*> seenFuns;
 
 #if DEBUG
-    errs() << "[Loop regionsNeeded] Check if blocks are in diff functions\n";
+    errs() << "[Loop regionsNeeded] While blocks are in diff functions\n";
 #endif
-    while (!sameFunction(blockMap)) {
+    while (!sameFunction(blocks)) {
       // To think on: does this change?
-      auto* goal = findCandidate(blockMap, root);
+      auto* goal = findCandidate(blocks, root);
 #if DEBUG
-      errs() << "[Loop !sameFunction] Go over each item in set\n";
+      errs() << "[Loop !sameFunction] Go over each targetInst\n";
 #endif
-      for (auto* item : set) {
+      for (auto* targetInst : targetInsts) {
         // not all blocks need to be moved up
-        Function* currFunc = blockMap[item]->getParent();
-        nested.insert(currFunc);
-        if (currFunc != goal) {
+        auto* curFun = blocks[targetInst]->getParent();
+        seenFuns.insert(curFun);
+        if (curFun != goal) {
           // if more than one call:
           // callChain info is already in the starting set
           // so only explore a caller if it's in conSet
           bool first = true;
-          for (User* use : currFunc->users()) {
-            // if (regionType == 1) {
-            if (!(find(set.begin(), set.end(), use) != set.end())) {
+          for (auto* use : curFun->users()) {
+            // if (regionKind == 1) {
+            if (!(find(targetInsts.begin(), targetInsts.end(), use) != targetInsts.end()))
               continue;
-            }
             // errs() << "Use: "<< *use << " is in call chain\n";
             //}
-            Instruction* inst = dyn_cast<Instruction>(use);
+            auto* inst = dyn_cast<Instruction>(use);
 #if DEBUGINFER
             errs() << "DEBUGINFER: examining use: " << *inst << "\n";
 #endif
             if (inst == NULL) {
-              // errs () <<"ERROR: use " << *use << "not an instruction\n";
+              // errs () << "ERROR: use " << *use << "not an instruction\n";
               break;
             }
             // update the original map
             if (first) {
-              blockMap[item] = inst->getParent();
+              blocks[targetInst] = inst->getParent();
               first = false;
             } else {
               // copy the blockmap, update, add to queue
-              Instruction* inst = dyn_cast<Instruction>(use);
+              auto* inst = dyn_cast<Instruction>(use);
               std::map<Instruction*, BasicBlock*> copy;
-              for (auto map : blockMap) {
-                copy[map.first] = map.second;
-              }
-              copy[item] = inst->getParent();
+              for (auto map : blocks) copy[map.first] = map.second;
+              copy[targetInst] = inst->getParent();
               regionsNeeded.push(copy);
             }
           }  // end forall uses
@@ -168,34 +166,114 @@ void InferFreshCons::addRegion(inst_vec set, int regionType) {
 #if DEBUG
     errs() << "[Loop regionsNeeded] Start dom tree analysis\n";
 #endif
-    auto* home = blockMap.begin()->second->getParent();
-    if (home == nullptr) {
+
+    auto* homeFun = blocks.begin()->second->getParent();
+    if (homeFun == nullptr) {
 #if DEBUG
-      errs() << "[Loop regionsNeeded] No function found\n";
+      errs() << "[regionsNeeded] No function found\n";
 #endif
       continue;
     }
 #if DEBUG
-    errs() << "[Loop regionsNeeded] Found home fun: " << home->getName() << "\n";
+    errs() << "[regionsNeeded] Found home fun: " << homeFun->getName() << "\n";
+#endif
+
+#if OPT
+#if DEBUG
+    errs() << "[regionsNeeded] Go over all block insts\n";
+#endif
+    // auto* B = blocks.begin()->second;
+    std::set<BasicBlock*> seenBlocks;
+    for (auto& [_, B] : blocks) {
+      if (seenBlocks.find(B) == seenBlocks.end()) {
+        seenBlocks.emplace(B);
+
+        std::vector<Instruction*> toDelay;
+        std::vector<Instruction*> toDelete;
+
+        for (auto& I : *B) {
+#if DEBUG
+          errs() << I << "\n";
+#endif
+          if (!isa<AllocaInst>(I) && find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end()) {
+#if DEBUG
+            errs() << "Should be delayed\n";
+#endif
+            Instruction *prev, *clone;
+            if (isa<BinaryOperator>(I)) {
+              if (I.getOpcode() == Instruction::Add)
+                clone = BinaryOperator::Create(Instruction::Add, prev, I.getOperand(1));
+              else
+                clone = I.clone();
+            } else if (isa<LoadInst>(I)) {
+              clone = I.clone();
+            } else if (auto* ci = dyn_cast<CallInst>(&I)) {
+              clone = CallInst::Create(ci->getCalledFunction(), prev);
+            } else if (auto* si = dyn_cast<StoreInst>(&I)) {
+              if (prev != nullptr && find(targetInsts.begin(), targetInsts.end(), prev) == targetInsts.end()) {
+                clone = I.clone();
+                clone->setOperand(0, prev);
+                errs() << "yo\n";
+              } else
+                clone = I.clone();
+            } else
+              clone = I.clone();
+            prev = clone;
+
+            toDelete.push_back(&I);
+            toDelay.push_back(clone);
+          }
+        }
+
+        IRBuilder builder(B);
+        for (auto* d : toDelay) {
+          // #if DEBUG
+          //           errs() << "Delayed: " << *d << "\n";
+          // #endif
+          builder.Insert(d);
+        }
+
+        auto I = B->begin();
+        for (; I != B->end();) {
+#if DEBUG
+          errs() << *I << "\n";
+#endif
+          if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) {
+#if DEBUG
+            errs() << "Delete\n";
+#endif
+            I = I->eraseFromParent();
+          } else
+            I++;
+        }
+
+#if DEBUG
+        errs() << "After: " << *B << "\n";
 #endif
-    auto& domTree = FAM->getResult<DominatorTreeAnalysis>(*home);
+      }
+    }
+#endif
+
+    auto& domTree = FAM->getResult<DominatorTreeAnalysis>(*homeFun);
     // Find the closest point that dominates
-    auto* startDom = blockMap.begin()->second;
-    for (auto& [_, B] : blockMap) {
+    auto* startDom = blocks.begin()->second;
+    for (auto& [_, B] : blocks)
       startDom = domTree.findNearestCommonDominator(B, startDom);
-    }
 #if DEBUG
     errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n";
 #endif
-// TODO: if an inst in the set is in the bb, we can truncate?
+
+    // TODO: if an inst in the set is in the bb, we can truncate?
+
 #if DEBUG
     errs() << "Start post dom tree analysis\n";
 #endif
+
     // Flip directions for the region end
-    auto& postDomTree = FAM->getResult<PostDominatorTreeAnalysis>(*home);
+    auto& postDomTree = FAM->getResult<PostDominatorTreeAnalysis>(*homeFun);
     // Find the closest point that dominates
-    auto* endDom = blockMap.begin()->second;
-    for (auto map : blockMap) {
+    auto* endDom = blocks.begin()->second;
+    for (auto& [_, block] : blocks) {
 #if DEBUGINFER
       if (endDom != nullptr) {
         errs() << "Finding post dom of: " << getSimpleNodeLabel(map.second) << " and " << getSimpleNodeLabel(endDom) << "\n";
@@ -203,8 +281,9 @@ void InferFreshCons::addRegion(inst_vec set, int regionType) {
         errs() << "endDom is null\n";
       }
 #endif
-      endDom = postDomTree.findNearestCommonDominator(map.second, endDom);
+      endDom = postDomTree.findNearestCommonDominator(block, endDom);
     }
+
 #if DEBUG
     errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n";
 #endif
@@ -214,9 +293,11 @@ void InferFreshCons::addRegion(inst_vec set, int regionType) {
     } else if (endDom == nullptr) {
       errs() << "[Error] Null endDom\n";
     }
+
     // Need to make the start and end dominate each other as well.
     startDom = domTree.findNearestCommonDominator(startDom, endDom);
     endDom = postDomTree.findNearestCommonDominator(startDom, endDom);
+
 #if DEBUG
     errs() << "[Loop regionsNeeded] After matching scope\n";
     errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n";
@@ -241,8 +322,8 @@ void InferFreshCons::addRegion(inst_vec set, int regionType) {
 #endif
     // TODO: fallback if endDom is null? Need hyper-blocks, I think
     // possibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations?
-    auto* regionStart = truncate(startDom, true, set, nested);
-    auto* regionEnd = truncate(endDom, false, set, nested);
+    auto* regionStart = truncate(startDom, true, targetInsts, seenFuns);
+    auto* regionEnd = truncate(endDom, false, targetInsts, seenFuns);
     if (regionStart == nullptr) {
       errs() << "[Error] Null startDom after truncation\n";
     } else if (regionEnd == nullptr) {
@@ -262,8 +343,8 @@ void InferFreshCons::addRegion(inst_vec set, int regionType) {
   // each other, so there's no possibility of not running into the start from
   // the end
   auto [regionStart, regionEnd] = findShortest(regionsFound);
-  insertRegionInst(0, regionStart);
-  insertRegionInst(1, regionEnd);
+  insertRegionInst(Start, regionStart);
+  insertRegionInst(End, regionEnd);
   //}//end while regions needed
 
 #if DEBUG
@@ -355,7 +436,6 @@ Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set
   return &B->front();
 }
 
-// findCandidate
 Function* InferFreshCons::findCandidate(std::map<Instruction*, BasicBlock*> blockMap, Function* root) {
 #if DEBUG
   errs() << "== findCandidate ===\n";
@@ -374,8 +454,8 @@ Function* InferFreshCons::findCandidate(std::map<Instruction*, BasicBlock*> bloc
   // Easy case: everything is already in the same function
   if (funList.size() == 1) return funList.at(0);
 
-  /* Algo Goal: get the deepest function that still calls (or is) all funcs in funcList.
-   * Consider: multiple calls? Should be dealt with in the add region function -- eventually each caller
+  /* Algo goal: get the deepest function that still calls (or is) all funcs in funcList.
+   * Consider: multiple calls? Should be dealt with in the addRegion -- eventually each caller
    * gets its own region
    */
   Function* goal = nullptr;
@@ -393,7 +473,7 @@ Function* InferFreshCons::findCandidate(std::map<Instruction*, BasicBlock*> bloc
   return goal;
 }
 
-/*Recursive: from a root, returns list of called funcs. */
+// From a root, returns list of called functions.
 std::vector<Function*> InferFreshCons::deepCaller(Function* root, std::vector<Function*>& funList, Function** goal) {
   std::vector<Function*> calledFuncs;
   bool mustIncludeSelf = false;
diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
index ee22ad8..f80f7ce 100644
--- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp
+++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
@@ -833,31 +833,50 @@ val_vec getControlDeps(Instruction* ti) {
 
 // Get direct uses (at src level, not IR) of a fresh var
 inst_vec traverseDirectUses(Instruction* root) {
+#if DEBUG
+  errs() << "=== traverseDirectUses ===\n";
+#endif
   inst_vec uses;
   std::queue<Instruction*> localDeps;
+#if DEBUG
+  errs() << "Add root to localDeps: " << *root << "\n";
+#endif
   localDeps.push(root);
 
   // Edge case: check if return is an internally allocated stack var
   Value* retPtr;
-  Instruction* last = &(root->getFunction()->back().back());
-  if (ReturnInst* ri = dyn_cast<ReturnInst>(last)) {
-    for (Use& op : ri->operands()) {
-      if (LoadInst* li = dyn_cast<LoadInst>(op.get())) {
+  auto* last = &(root->getFunction()->back().back());
+  if (auto* ri = dyn_cast<ReturnInst>(last)) {
+    for (auto& op : ri->operands()) {
+      if (auto* li = dyn_cast<LoadInst>(op.get())) {
         retPtr = li->getPointerOperand();
+#if DEBUG
+        errs() << "retPtr: " << *retPtr << "\n";
+#endif
       }
     }
   }
 
   while (!localDeps.empty()) {
-    Instruction* currVal = localDeps.front();
-    uses.push_back(currVal);
+    auto* curVal = localDeps.front();
+#if DEBUG
+    errs() << "[Loop localDeps] Add curVal to uses: " << *curVal << "\n";
+#endif
+    uses.push_back(curVal);
     localDeps.pop();
-    for (Value* use : currVal->users()) {
-      // if it's a gepi, see if there are others that occur afterwards
+
+#if DEBUG
+    errs() << "[Loop localDeps] Go over curVal users\n";
+#endif
+    for (auto* use : curVal->users()) {
+#if DEBUG
+      errs() << "[Loop users] use: " << *use << "\n";
+#endif
+      // If it's a gepi, see if there are others that occur afterwards
       //       errs() << *use <<" is a direct use of " << *currVal<<"\n";
       if (isa<GetElementPtrInst>(use)) {
-        inst_vec matching = couldMatchGEPI(dyn_cast<GetElementPtrInst>(use));
-        for (Instruction* item : matching) {
+        auto matching = couldMatchGEPI(dyn_cast<GetElementPtrInst>(use));
+        for (auto* item : matching) {
           //  errs() << "pushing to local deps " << *item <<"\n";
           localDeps.push(item);
         }
@@ -868,8 +887,14 @@ inst_vec traverseDirectUses(Instruction* root) {
           }
         }
       } else if (StoreInst* si = dyn_cast<StoreInst>(use)) {
-        // if stores into ret pointer, treat as above
+#if DEBUG
+        errs() << "[Loop users] use = StoreInst\n";
+#endif
+        // If stores into ret pointer, treat as above
         if (si->getPointerOperand() == retPtr) {
+#if DEBUG
+          errs() << "[Loop users] ptr operand = retPtr\n";
+#endif
           for (Value* calls : si->getFunction()->users()) {
             if (isa<CallInst>(calls)) {
               uses.push_back(dyn_cast<Instruction>(calls));
@@ -877,37 +902,112 @@ inst_vec traverseDirectUses(Instruction* root) {
           }
         }
       } else if (BranchInst* bi = dyn_cast<BranchInst>(use)) {
-        // if a use is a branch inst the atomic region needs to
+        // If a use is a branch inst the atomic region needs to
         // dominate the successors
         for (BasicBlock* bbInterior : bi->successors()) {
-          // skip panic blocks, otherwise there will be no post dom
+          // Skip panic blocks, otherwise there will be no post dom
           if (bbInterior->getName().equals("panic")) {
             continue;
           }
           uses.push_back(&(bbInterior->front()));
         }
       } else if (CallInst* ci = dyn_cast<CallInst>(use)) {
+#if DEBUG
+        errs() << "[Loop users] use = CallInst\n";
+#endif
         if (ci->hasName() && ci->getName().startswith("_")) {
-          // fall through
+          // Fall through
         } else {
+#if DEBUG
+          errs() << "[Loop users] Add CallInst to uses\n";
+#endif
           uses.push_back(ci);
           continue;
         }
       }
-      if (Instruction* iUse = dyn_cast<Instruction>(use)) {
-        // see if load is to another var or just internal ssa
-        if (LoadInst* li = dyn_cast<LoadInst>(iUse)) {
+
+      if (auto* iUse = dyn_cast<Instruction>(use)) {
+        // See if load is to another var or just internal ssa
+        if (auto* li = dyn_cast<LoadInst>(iUse)) {
           if (li->hasName()) {
-            // Hacky --verify that this is always true
-            if (!li->getName().startswith("_")) {
+            // Hacky -- verify that this is always true
+            if (!li->getName().startswith("_"))
               continue;
-            }
           }
         }
+
+#if DEBUG
+        errs() << "[Loop users] Add use to localDeps\n";
+#endif
         localDeps.push(iUse);
       }
     }
   }
 
+#if DEBUG
+  errs() << "*** traverseDirectUses ***\n";
+#endif
   return uses;
 }
+
+inst_vec traverseUses(Instruction* root) {
+#if DEBUG
+  errs() << "=== traverseUses ===\n";
+#endif
+  auto directUses = traverseDirectUses(root);
+  inst_set uses(directUses.begin(), directUses.end());
+
+  for (auto* directUse : directUses) {
+#if DEBUG
+    errs() << "[directUses] directUse: " << *directUse << "\n";
+#endif
+
+    if (auto* si = dyn_cast<StoreInst>(directUse)) {
+#if DEBUG
+      errs() << "[directUses] directUse = StoreInst\n";
+#endif
+
+      auto* ptr = si->getPointerOperand();
+#if DEBUG
+      errs() << "[directUses] ptr operand: " << *ptr << "\n";
+#endif
+
+      for (auto* ptrUse : ptr->users()) {
+        if (auto* li = dyn_cast<LoadInst>(ptrUse)) {
+#if DEBUG
+          errs() << "[ptrUsers] Add ptrUse (LoadInst) to uses: " << *ptrUse << "\n";
+#endif
+          uses.emplace(li);
+
+          for (auto* liUse : li->users()) {
+            if (auto* ci = dyn_cast<CallInst>(liUse)) {
+#if DEBUG
+              errs() << "[liUsers] Add liUse (CallInst) to uses: " << *liUse << "\n";
+#endif
+              uses.emplace(ci);
+            }
+          }
+        }
+      }
+    } else if (auto* li = dyn_cast<LoadInst>(directUse)) {
+#if DEBUG
+      errs() << "[directUses] directUse = LoadInst\n";
+#endif
+      auto* ptr = li->getPointerOperand();
+      for (auto* ptrUse : ptr->users()) {
+        if (auto* si = dyn_cast<StoreInst>(ptrUse)) {
+#if DEBUG
+          errs() << "[ptrUses] Add ptrUse (StoreInst) to uses: " << *si << "\n";
+#endif
+          uses.emplace(si);
+        }
+      }
+    }
+  }
+
+#if DEBUG
+  errs() << "=== traverseUses ===\n";
+#endif
+  inst_vec uses_vec(uses.begin(), uses.end());
+  return uses_vec;
+}
diff --git a/ocelot/AtomicRegionInference/src/include/HelperTypes.h b/ocelot/AtomicRegionInference/src/include/HelperTypes.h
index 9565b1f..29efcc0 100644
--- a/ocelot/AtomicRegionInference/src/include/HelperTypes.h
+++ b/ocelot/AtomicRegionInference/src/include/HelperTypes.h
@@ -19,20 +19,19 @@
 #include "llvm/IR/SymbolTableListTraits.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
-#define DEBUG 1
-
 using namespace llvm;
 
 typedef std::vector<Value*> val_vec;
 typedef std::vector<BasicBlock*> bb_vec;
 typedef std::vector<Instruction*> inst_vec;
+typedef std::set<Instruction*> inst_set;
 typedef std::map<Value*, inst_vec> val_insts_map;
 typedef std::vector<GlobalVariable*> gv_vec;
 typedef std::vector<std::pair<Value*, Instruction*>> val_inst_vec;
 typedef std::pair<Instruction*, Instruction*> inst_inst_pair;
 typedef std::vector<inst_inst_pair> inst_inst_vec;
 typedef std::map<Instruction*, val_vec> inst_vals_map;
-typedef std::map<Instruction*, std::set<Instruction*>> inst_insts_map;
+typedef std::map<Instruction*, inst_set> inst_insts_map;
 typedef std::vector<Function*> func_vec;
 typedef std::vector<inst_vec> inst_vec_vec;
 
diff --git a/ocelot/AtomicRegionInference/src/include/Helpers.h b/ocelot/AtomicRegionInference/src/include/Helpers.h
index 8e940f0..bfb6901 100644
--- a/ocelot/AtomicRegionInference/src/include/Helpers.h
+++ b/ocelot/AtomicRegionInference/src/include/Helpers.h
@@ -7,6 +7,9 @@
 
 using namespace llvm;
 
+#define DEBUG 1
+#define OPT 1
+
 std::string getSimpleNodeLabel(const Value* Node);
 bool isAnnot(const StringRef annotName);
 void printInstInsts(const inst_insts_map& iim, bool onlyCalls = false);
diff --git a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
index e9defee..b3fcd10 100644
--- a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
+++ b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
@@ -13,11 +13,18 @@ struct InferFreshCons {
     atomStart = _as;
     atomEnd = _ae;
   }
+
+  enum RegionKind { Fresh,
+                    Consistent };
+
+  enum InsertKind { Start,
+                    End };
+
   void inferConsistent(std::map<int, inst_vec> allSets);
   void inferFresh(inst_vec_vec allSets);
-  void addRegion(inst_vec conSet, int regType);
+  void addRegion(inst_vec conSet, RegionKind regionKind);
   Function* findCandidate(std::map<Instruction*, BasicBlock*> blocks, Function* root);
-  Instruction* insertRegionInst(int regInst, Instruction* insertBefore);
+  Instruction* insertRegionInst(InsertKind insertKind, Instruction* insertBefore);
   bool sameFunction(std::map<Instruction*, BasicBlock*> blockMap);
   Instruction* truncate(BasicBlock* bb, bool forwards, inst_vec conSet, std::set<Function*> nested);
   std::vector<Function*> deepCaller(Function* root, std::vector<Function*>& funcList, Function** goal);
diff --git a/ocelot/AtomicRegionInference/src/include/TaintTracker.h b/ocelot/AtomicRegionInference/src/include/TaintTracker.h
index 1d7eaf7..ea3ce03 100644
--- a/ocelot/AtomicRegionInference/src/include/TaintTracker.h
+++ b/ocelot/AtomicRegionInference/src/include/TaintTracker.h
@@ -13,5 +13,6 @@ bool storePrecedesUse(Instruction* use, StoreInst* toMatch);
 inst_vec couldMatchGEPI(GetElementPtrInst* tGEPI);
 val_vec getControlDeps(Instruction* ti);
 inst_vec traverseDirectUses(Instruction* root);
+inst_vec traverseUses(Instruction* root);
 
 #endif

From cde1b66aad5c00785863b7e1804f278e3938417d Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Mon, 18 Dec 2023 22:30:46 -0500
Subject: [PATCH 04/18] [InferAtomsPass] Unignore .ll files in ctests and add
 more comments

---
 .gitignore                                    |  1 -
 benchmarks/ctests/example01.ll                | 61 ++++++++++++
 benchmarks/ctests/example02.ll                | 79 +++++++++++++++
 benchmarks/ctests/example02.orig.ll           | 97 +++++++++++++++++++
 benchmarks/ctests/example03.ll                | 64 ++++++++++++
 benchmarks/ctests/example03.orig.ll           | 82 ++++++++++++++++
 .../src/InferFreshCons.cpp                    | 30 +++---
 7 files changed, 398 insertions(+), 16 deletions(-)
 create mode 100644 benchmarks/ctests/example01.ll
 create mode 100644 benchmarks/ctests/example02.ll
 create mode 100644 benchmarks/ctests/example02.orig.ll
 create mode 100644 benchmarks/ctests/example03.ll
 create mode 100644 benchmarks/ctests/example03.orig.ll

diff --git a/.gitignore b/.gitignore
index 17712eb..6f9ba50 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,4 @@
 .vscode
 ocelot/AtomicRegionInference/build
-benchmarks/ctests/*.ll
 
 .DS_Store
\ No newline at end of file
diff --git a/benchmarks/ctests/example01.ll b/benchmarks/ctests/example01.ll
new file mode 100644
index 0000000..c4e8656
--- /dev/null
+++ b/benchmarks/ctests/example01.ll
@@ -0,0 +1,61 @@
+; ModuleID = '../../benchmarks/ctests/example01.c'
+source_filename = "../../benchmarks/ctests/example01.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@.str = private unnamed_addr constant [7 x i8] c"Fresh\0A\00", align 1
+@.str.1 = private unnamed_addr constant [12 x i8] c"Consistent\0A\00", align 1
+@IO_NAME1 = global ptr @tmp, align 8
+
+declare i32 @printf(ptr noundef, ...) #0
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #1 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #1 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @tmp() #1 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #1 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @app() #1 {
+entry:
+  %x = alloca i32, align 4
+  call void @atomic_start()
+  %call = call i32 @tmp()
+  store i32 %call, ptr %x, align 4
+  %0 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %0)
+  call void @atomic_end()
+  ret i32 0
+}
+
+attributes #0 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
diff --git a/benchmarks/ctests/example02.ll b/benchmarks/ctests/example02.ll
new file mode 100644
index 0000000..06281bb
--- /dev/null
+++ b/benchmarks/ctests/example02.ll
@@ -0,0 +1,79 @@
+; ModuleID = '../../benchmarks/ctests/example02.c'
+source_filename = "../../benchmarks/ctests/example02.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @sense, align 8
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @sense() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @norm(i32 noundef %t) #0 {
+entry:
+  %t.addr = alloca i32, align 4
+  store i32 %t, ptr %t.addr, align 4
+  %0 = load i32, ptr %t.addr, align 4
+  ret i32 %0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @tmp() #0 {
+entry:
+  %t = alloca i32, align 4
+  %t_norm = alloca i32, align 4
+  %call = call i32 @sense()
+  store i32 %call, ptr %t, align 4
+  %0 = load i32, ptr %t, align 4
+  %call1 = call i32 @norm(i32 noundef %0)
+  store i32 %call1, ptr %t_norm, align 4
+  %1 = load i32, ptr %t_norm, align 4
+  ret i32 %1
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  call void @atomic_start()
+  %call = call i32 @tmp()
+  store i32 %call, ptr %x, align 4
+  %0 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %0)
+  call void @atomic_end()
+  ret void
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
diff --git a/benchmarks/ctests/example02.orig.ll b/benchmarks/ctests/example02.orig.ll
new file mode 100644
index 0000000..8eccea6
--- /dev/null
+++ b/benchmarks/ctests/example02.orig.ll
@@ -0,0 +1,97 @@
+; ModuleID = '../../benchmarks/ctests/example02.c'
+source_filename = "../../benchmarks/ctests/example02.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @sense, align 8
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Fresh(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Consistent(i32 noundef %x, i32 noundef %id) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  %id.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  store i32 %id, ptr %id.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @sense() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @norm(i32 noundef %t) #0 {
+entry:
+  %t.addr = alloca i32, align 4
+  store i32 %t, ptr %t.addr, align 4
+  %0 = load i32, ptr %t.addr, align 4
+  ret i32 %0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @tmp() #0 {
+entry:
+  %t = alloca i32, align 4
+  %t_norm = alloca i32, align 4
+  %call = call i32 @sense()
+  store i32 %call, ptr %t, align 4
+  %0 = load i32, ptr %t, align 4
+  %call1 = call i32 @norm(i32 noundef %0)
+  store i32 %call1, ptr %t_norm, align 4
+  %1 = load i32, ptr %t_norm, align 4
+  ret i32 %1
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %call = call i32 @tmp()
+  store i32 %call, ptr %x, align 4
+  %0 = load i32, ptr %x, align 4
+  call void @Fresh(i32 noundef %0)
+  %1 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %1)
+  ret void
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
diff --git a/benchmarks/ctests/example03.ll b/benchmarks/ctests/example03.ll
new file mode 100644
index 0000000..f47c6b7
--- /dev/null
+++ b/benchmarks/ctests/example03.ll
@@ -0,0 +1,64 @@
+; ModuleID = '../../benchmarks/ctests/example03.c'
+source_filename = "../../benchmarks/ctests/example03.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %z = alloca i32, align 4
+  call void @atomic_start()
+  %call = call i32 @input()
+  store i32 %call, ptr %x, align 4
+  %0 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %0)
+  call void @atomic_end()
+  store i32 1, ptr %y, align 4
+  %1 = load i32, ptr %y, align 4
+  %2 = add i32 %1, 1
+  store i32 %2, ptr %z, align 4
+  %3 = load i32, ptr %z, align 4
+  call void @log(i32 %3)
+  ret void
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
diff --git a/benchmarks/ctests/example03.orig.ll b/benchmarks/ctests/example03.orig.ll
new file mode 100644
index 0000000..89676a7
--- /dev/null
+++ b/benchmarks/ctests/example03.orig.ll
@@ -0,0 +1,82 @@
+; ModuleID = '../../benchmarks/ctests/example03.c'
+source_filename = "../../benchmarks/ctests/example03.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Fresh(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Consistent(i32 noundef %x, i32 noundef %id) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  %id.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  store i32 %id, ptr %id.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %z = alloca i32, align 4
+  %call = call i32 @input()
+  store i32 %call, ptr %x, align 4
+  store i32 1, ptr %y, align 4
+  %0 = load i32, ptr %y, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, ptr %z, align 4
+  %1 = load i32, ptr %z, align 4
+  call void @log(i32 noundef %1)
+  %2 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %2)
+  %3 = load i32, ptr %x, align 4
+  call void @Fresh(i32 noundef %3)
+  ret void
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index 0e1f93b..727a6d1 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -182,14 +182,13 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
 #if DEBUG
     errs() << "[regionsNeeded] Go over all block insts\n";
 #endif
-    // auto* B = blocks.begin()->second;
     std::set<BasicBlock*> seenBlocks;
     for (auto& [_, B] : blocks) {
       if (seenBlocks.find(B) == seenBlocks.end()) {
         seenBlocks.emplace(B);
 
-        std::vector<Instruction*> toDelay;
-        std::vector<Instruction*> toDelete;
+        inst_vec toDelay;
+        inst_set toDelete;
 
         for (auto& I : *B) {
 #if DEBUG
@@ -200,45 +199,46 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
             errs() << "Should be delayed\n";
 #endif
             Instruction *prev, *clone;
+
+            // Clone each untainted instruction to be inserted to
+            // the end of the basic block
             if (isa<BinaryOperator>(I)) {
               if (I.getOpcode() == Instruction::Add)
                 clone = BinaryOperator::Create(Instruction::Add, prev, I.getOperand(1));
               else
                 clone = I.clone();
-            } else if (isa<LoadInst>(I)) {
-              clone = I.clone();
             } else if (auto* ci = dyn_cast<CallInst>(&I)) {
-              clone = CallInst::Create(ci->getCalledFunction(), prev);
+              if (prev != nullptr)
+                clone = CallInst::Create(ci->getCalledFunction(), prev);
             } else if (auto* si = dyn_cast<StoreInst>(&I)) {
               if (prev != nullptr && find(targetInsts.begin(), targetInsts.end(), prev) == targetInsts.end()) {
                 clone = I.clone();
                 clone->setOperand(0, prev);
-                errs() << "yo\n";
               } else
                 clone = I.clone();
             } else
               clone = I.clone();
+
+            // Keep track of the previous instruction to allow LLVM
+            // to remap virtual registers (avoiding <badref>'s)
             prev = clone;
 
-            toDelete.push_back(&I);
+            toDelete.emplace(&I);
             toDelay.push_back(clone);
           }
         }
 
         IRBuilder builder(B);
-        for (auto* d : toDelay) {
-          // #if DEBUG
-          //           errs() << "Delayed: " << *d << "\n";
-          // #endif
-          builder.Insert(d);
-        }
+        // Insert each delayed instruction to the end of the block
+        for (auto* d : toDelay) builder.Insert(d);
 
         auto I = B->begin();
+        // Delete their duplicates earlier in the block
         for (; I != B->end();) {
 #if DEBUG
           errs() << *I << "\n";
 #endif
-          if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) {
+          if (toDelete.find(&*I) != toDelete.end()) {
 #if DEBUG
             errs() << "Delete\n";
 #endif

From c772992cd18d8cb3ded330afa18a27ac0d807df5 Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Tue, 30 Jan 2024 22:09:36 -0500
Subject: [PATCH 05/18] [InferAtomsPass] Generalize instruction scheduling

The optimization is now much more robust against general source
programs. Freshness annotations now work pretty well!

The main fix to the previous setup involves a mapping from old
instructions to cloned ones. Since cloning an instruction (e.g.,
BinaryOperator) doesn't automatically clone its operands, this mapping
is required to help replace the operands of cloned instructions with the
clones of those operands. Cloning is the only approach to such
replacements due to the LLVM IR being in SSA form.

Test plan:

Run examples01/02/03 to see the tranformations. For example,

```sh
make eg3
```

Before optimization:

```llvm
define void @app() #0 {
entry:
  %x = alloca i32, align 4
  %y = alloca i32, align 4
  %z = alloca i32, align 4
  call void @atomic_start()         ; <--- START
  %call = call i32 @input()
  store i32 %call, ptr %x, align 4
  store i32 1, ptr %y, align 4
  %0 = load i32, ptr %y, align 4
  %add = add nsw i32 %0, 1
  store i32 %add, ptr %z, align 4
  %1 = load i32, ptr %z, align 4
  call void @log(i32 noundef %1)
  %2 = load i32, ptr %x, align 4
  call void @log(i32 noundef %2)
  call void @atomic_end()           ; <--- END
  ret void
}
```

After optimization:

```llvm
define void @app() #0 {
entry:
  %x = alloca i32, align 4
  %y = alloca i32, align 4
  %z = alloca i32, align 4
  call void @atomic_start()         ; <--- START
  %call = call i32 @input()
  store i32 %call, ptr %x, align 4
  %0 = load i32, ptr %x, align 4
  call void @log(i32 noundef %0)
  call void @atomic_end()           ; <--- END
  store i32 1, ptr %y, align 4
  %1 = load i32, ptr %y, align 4
  %2 = add nsw i32 %1, 1
  store i32 %2, ptr %z, align 4
  %3 = load i32, ptr %z, align 4
  call void @log(i32 noundef %3)
  ret void
}
```

You may also link, build, and run an executable via:

```sh
make run_eg3 && ../../benchmarks/ctests/example03.out
```
---
 .gitignore                                    |  1 +
 benchmarks/ctests/example01.c                 |  9 +-
 benchmarks/ctests/example01.ll                | 10 ++
 benchmarks/ctests/example01.orig.ll           | 91 +++++++++++++++++++
 benchmarks/ctests/example02.c                 | 10 +-
 benchmarks/ctests/example02.ll                | 13 +++
 benchmarks/ctests/example02.orig.ll           | 13 +++
 benchmarks/ctests/example03.c                 | 10 +-
 benchmarks/ctests/example03.ll                | 17 +++-
 benchmarks/ctests/example03.orig.ll           | 13 +++
 ocelot/AtomicRegionInference/Makefile         | 20 +++-
 ocelot/AtomicRegionInference/README.md        | 19 +++-
 .../src/InferFreshCons.cpp                    | 67 +++++++++-----
 .../src/include/HelperTypes.h                 |  1 +
 .../src/include/InferAtoms.h                  |  6 ++
 15 files changed, 267 insertions(+), 33 deletions(-)
 create mode 100644 benchmarks/ctests/example01.orig.ll

diff --git a/.gitignore b/.gitignore
index 6f9ba50..fa78942 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 .vscode
 ocelot/AtomicRegionInference/build
+benchmarks/ctests/*.out
 
 .DS_Store
\ No newline at end of file
diff --git a/benchmarks/ctests/example01.c b/benchmarks/ctests/example01.c
index 4b5b66f..3bad3e9 100644
--- a/benchmarks/ctests/example01.c
+++ b/benchmarks/ctests/example01.c
@@ -8,11 +8,18 @@ void atomic_end() {}
 
 int tmp() { return 0; }
 int (*IO_NAME1)() = tmp;
-void log(int x) {}
+
+void log(int x) {
+  printf("%d\n", x);
+}
 
 int app() {
   int x = tmp();
   Fresh(x);
   log(x);
   return 0;
+}
+
+int main() {
+  app();
 }
\ No newline at end of file
diff --git a/benchmarks/ctests/example01.ll b/benchmarks/ctests/example01.ll
index c4e8656..c38981e 100644
--- a/benchmarks/ctests/example01.ll
+++ b/benchmarks/ctests/example01.ll
@@ -6,6 +6,7 @@ target triple = "arm64-apple-macosx12.0.0"
 @.str = private unnamed_addr constant [7 x i8] c"Fresh\0A\00", align 1
 @.str.1 = private unnamed_addr constant [12 x i8] c"Consistent\0A\00", align 1
 @IO_NAME1 = global ptr @tmp, align 8
+@.str.2 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
 declare i32 @printf(ptr noundef, ...) #0
 
@@ -32,6 +33,8 @@ define void @log(i32 noundef %x) #1 {
 entry:
   %x.addr = alloca i32, align 4
   store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str.2, i32 noundef %0)
   ret void
 }
 
@@ -48,6 +51,13 @@ entry:
   ret i32 0
 }
 
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #1 {
+entry:
+  %call = call i32 @app()
+  ret i32 0
+}
+
 attributes #0 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
 attributes #1 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
 
diff --git a/benchmarks/ctests/example01.orig.ll b/benchmarks/ctests/example01.orig.ll
new file mode 100644
index 0000000..68b2445
--- /dev/null
+++ b/benchmarks/ctests/example01.orig.ll
@@ -0,0 +1,91 @@
+; ModuleID = '../../benchmarks/ctests/example01.c'
+source_filename = "../../benchmarks/ctests/example01.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@.str = private unnamed_addr constant [7 x i8] c"Fresh\0A\00", align 1
+@.str.1 = private unnamed_addr constant [12 x i8] c"Consistent\0A\00", align 1
+@IO_NAME1 = global ptr @tmp, align 8
+@.str.2 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Fresh(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Consistent(i32 noundef %x, i32 noundef %id) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  %id.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  store i32 %id, ptr %id.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str.1)
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @tmp() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str.2, i32 noundef %0)
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %call = call i32 @tmp()
+  store i32 %call, ptr %x, align 4
+  %0 = load i32, ptr %x, align 4
+  call void @Fresh(i32 noundef %0)
+  %1 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %1)
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  %call = call i32 @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
diff --git a/benchmarks/ctests/example02.c b/benchmarks/ctests/example02.c
index 1047d9e..75bced9 100644
--- a/benchmarks/ctests/example02.c
+++ b/benchmarks/ctests/example02.c
@@ -1,3 +1,5 @@
+#include <stdio.h>
+
 void Fresh(int x) {}
 void Consistent(int x, int id) {}
 
@@ -9,7 +11,9 @@ int (*IO_NAME)() = sense;
 
 int norm(int t) { return t; }
 
-void log(int x) {}
+void log(int x) {
+  printf("%d\n", x);
+}
 
 int tmp() {
   int t = sense();
@@ -21,4 +25,8 @@ void app() {
   int x = tmp();
   Fresh(x);
   log(x);
+}
+
+int main() {
+  app();
 }
\ No newline at end of file
diff --git a/benchmarks/ctests/example02.ll b/benchmarks/ctests/example02.ll
index 06281bb..5a557b7 100644
--- a/benchmarks/ctests/example02.ll
+++ b/benchmarks/ctests/example02.ll
@@ -4,6 +4,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
 @IO_NAME = global ptr @sense, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
 define void @atomic_start() #0 {
@@ -37,9 +38,13 @@ define void @log(i32 noundef %x) #0 {
 entry:
   %x.addr = alloca i32, align 4
   store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
   ret void
 }
 
+declare i32 @printf(ptr noundef, ...) #1
+
 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
 define i32 @tmp() #0 {
 entry:
@@ -67,7 +72,15 @@ entry:
   ret void
 }
 
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
 attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 !llvm.ident = !{!4}
diff --git a/benchmarks/ctests/example02.orig.ll b/benchmarks/ctests/example02.orig.ll
index 8eccea6..550dc07 100644
--- a/benchmarks/ctests/example02.orig.ll
+++ b/benchmarks/ctests/example02.orig.ll
@@ -4,6 +4,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
 @IO_NAME = global ptr @sense, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
 define void @Fresh(i32 noundef %x) #0 {
@@ -55,9 +56,13 @@ define void @log(i32 noundef %x) #0 {
 entry:
   %x.addr = alloca i32, align 4
   store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
   ret void
 }
 
+declare i32 @printf(ptr noundef, ...) #1
+
 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
 define i32 @tmp() #0 {
 entry:
@@ -85,7 +90,15 @@ entry:
   ret void
 }
 
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
 attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 !llvm.ident = !{!4}
diff --git a/benchmarks/ctests/example03.c b/benchmarks/ctests/example03.c
index 98b9d0d..06d59f8 100644
--- a/benchmarks/ctests/example03.c
+++ b/benchmarks/ctests/example03.c
@@ -1,3 +1,5 @@
+#include <stdio.h>
+
 void Fresh(int x) {}
 void Consistent(int x, int id) {}
 
@@ -7,7 +9,9 @@ void atomic_end() {}
 int input() { return 0; }
 int (*IO_NAME)() = input;
 
-void log(int x) {}
+void log(int x) {
+  printf("%d\n", x);
+}
 
 void app() {
   int x = input();
@@ -16,4 +20,8 @@ void app() {
   log(z);
   log(x);
   Fresh(x);
+}
+
+int main() {
+  app();
 }
\ No newline at end of file
diff --git a/benchmarks/ctests/example03.ll b/benchmarks/ctests/example03.ll
index f47c6b7..f642b6b 100644
--- a/benchmarks/ctests/example03.ll
+++ b/benchmarks/ctests/example03.ll
@@ -4,6 +4,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
 @IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
 define void @atomic_start() #0 {
@@ -28,9 +29,13 @@ define void @log(i32 noundef %x) #0 {
 entry:
   %x.addr = alloca i32, align 4
   store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
   ret void
 }
 
+declare i32 @printf(ptr noundef, ...) #1
+
 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
 define void @app() #0 {
 entry:
@@ -45,14 +50,22 @@ entry:
   call void @atomic_end()
   store i32 1, ptr %y, align 4
   %1 = load i32, ptr %y, align 4
-  %2 = add i32 %1, 1
+  %2 = add nsw i32 %1, 1
   store i32 %2, ptr %z, align 4
   %3 = load i32, ptr %z, align 4
-  call void @log(i32 %3)
+  call void @log(i32 noundef %3)
   ret void
 }
 
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
 attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 !llvm.ident = !{!4}
diff --git a/benchmarks/ctests/example03.orig.ll b/benchmarks/ctests/example03.orig.ll
index 89676a7..89a0869 100644
--- a/benchmarks/ctests/example03.orig.ll
+++ b/benchmarks/ctests/example03.orig.ll
@@ -4,6 +4,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
 @IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
 define void @Fresh(i32 noundef %x) #0 {
@@ -46,9 +47,13 @@ define void @log(i32 noundef %x) #0 {
 entry:
   %x.addr = alloca i32, align 4
   store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
   ret void
 }
 
+declare i32 @printf(ptr noundef, ...) #1
+
 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
 define void @app() #0 {
 entry:
@@ -70,7 +75,15 @@ entry:
   ret void
 }
 
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
 attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 !llvm.ident = !{!4}
diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile
index 9ab940c..306f019 100644
--- a/ocelot/AtomicRegionInference/Makefile
+++ b/ocelot/AtomicRegionInference/Makefile
@@ -1,5 +1,10 @@
 .PHONY: clean_tests clean eg1 eg2
 
+all:
+	make eg1
+	make eg2
+	make eg3
+
 eg1:
 	TEST=example01 make test
 eg2:
@@ -7,8 +12,15 @@ eg2:
 eg3:
 	TEST=example03 make test
 
+run_eg1:
+	TEST=example01 make run
+run_eg2:
+	TEST=example02 make run
+run_eg3:
+	TEST=example03 make run
+
 test:
-	$(MAKE) -C build all
+	$(MAKE) -C build
 	clang -S -emit-llvm\
 		-fno-discard-value-names\
 		../../benchmarks/ctests/$(TEST).c\
@@ -19,6 +31,12 @@ test:
 		../../benchmarks/ctests/$(TEST).c\
 		-o ../../benchmarks/ctests/$(TEST).ll
 
+run:
+	$(MAKE) -C build
+	clang -fpass-plugin=build/src/InferAtomsPass.dylib\
+		../../benchmarks/ctests/$(TEST).c\
+		-o ../../benchmarks/ctests/$(TEST).out
+
 clean_tests:
 	find ../../benchmarks/ctests -name "*.ll" -exec rm -rf {} \;
 
diff --git a/ocelot/AtomicRegionInference/README.md b/ocelot/AtomicRegionInference/README.md
index 5895b8c..2f9aed8 100644
--- a/ocelot/AtomicRegionInference/README.md
+++ b/ocelot/AtomicRegionInference/README.md
@@ -11,11 +11,22 @@ cmake ..
 make
 ```
 
-You may bootstrap Clang to use the pass to compile a C file like so:
+You may bootstrap Clang to use the pass to compile a C file like so (run in the
+same directory as this README):
 
 ```sh
-clang -S -emit-llvm -fpass-plugin=src/InferAtomsPass.dylib -fno-discard-value-names ../../../benchmarks/ctests/example01.c
+clang -S -emit-llvm -fpass-plugin=build/src/InferAtomsPass.dylib -fno-discard-value-names ../../benchmarks/ctests/example03.c
 ```
 
-Or, when testing, use the shortcuts provided in the Makefile (e.g., `make eg1`),
-which produce two LLVM IRs with and without the pass enabled.
+Or, use the shortcuts provided in the Makefile (e.g., `make eg3`), which produce
+two LLVM IRs with and without the pass enabled.
+
+Actually link and produce executable by running:
+
+```sh
+clang -fpass-plugin=build/src/InferAtomsPass.dylib ../../benchmarks/ctests/example03.c -o ../../benchmarks/ctests/example03.out
+
+../../benchmarks/ctests/example03.out
+```
+
+Or, use the equivalent shortcut `make run_eg3 && ../../benchmarks/ctests/example03.out`.
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index 727a6d1..2802f76 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -187,7 +187,11 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
       if (seenBlocks.find(B) == seenBlocks.end()) {
         seenBlocks.emplace(B);
 
+        // A mapping from original instructions to their clones
+        inst_inst_map clonedInsts;
+        // Instructions to be delayed till the end of the block
         inst_vec toDelay;
+        // (The original) instructions to be deleted
         inst_set toDelete;
 
         for (auto& I : *B) {
@@ -198,49 +202,66 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
 #if DEBUG
             errs() << "Should be delayed\n";
 #endif
-            Instruction *prev, *clone;
 
-            // Clone each untainted instruction to be inserted to
-            // the end of the basic block
+            Instruction* clone;
+
+            // Clone each untainted instruction to be appended to
+            // the end of the basic block, in the original order
             if (isa<BinaryOperator>(I)) {
-              if (I.getOpcode() == Instruction::Add)
-                clone = BinaryOperator::Create(Instruction::Add, prev, I.getOperand(1));
-              else
-                clone = I.clone();
+              clone = I.clone();
+
+              for (int i = 0; i < 2; i++) {
+                if (auto* op = dyn_cast<Instruction>(I.getOperand(i))) {
+                  // Since operands don't get cloned along the eway,
+                  // look up the clone of each operand...
+                  inst_inst_map::iterator it = clonedInsts.find(op);
+                  assert(it != clonedInsts.end());
+                  // ...and overwrite the original operand with it
+                  clone->setOperand(i, it->second);
+                }
+              }
             } else if (auto* ci = dyn_cast<CallInst>(&I)) {
-              if (prev != nullptr)
-                clone = CallInst::Create(ci->getCalledFunction(), prev);
-            } else if (auto* si = dyn_cast<StoreInst>(&I)) {
-              if (prev != nullptr && find(targetInsts.begin(), targetInsts.end(), prev) == targetInsts.end()) {
-                clone = I.clone();
-                clone->setOperand(0, prev);
-              } else
-                clone = I.clone();
-            } else
               clone = I.clone();
 
-            // Keep track of the previous instruction to allow LLVM
-            // to remap virtual registers (avoiding <badref>'s)
-            prev = clone;
+              if (auto* op = dyn_cast<Instruction>(I.getOperand(0))) {
+                inst_inst_map::iterator it = clonedInsts.find(op);
+                assert(it != clonedInsts.end());
+                clone->setOperand(0, it->second);
+              }
+            } else if (isa<StoreInst>(&I)) {
+              clone = I.clone();
+
+              if (auto* op = dyn_cast<Instruction>(I.getOperand(0))) {
+                inst_inst_map::iterator it = clonedInsts.find(op);
+                assert(it != clonedInsts.end());
+                clone->setOperand(0, it->second);
+              }
+            }
+            // e.g., LoadInst
+            else {
+              clone = I.clone();
+            }
 
+            clonedInsts.emplace(&I, clone);
             toDelete.emplace(&I);
             toDelay.push_back(clone);
           }
         }
 
         IRBuilder builder(B);
-        // Insert each delayed instruction to the end of the block
-        for (auto* d : toDelay) builder.Insert(d);
+        // Append each delayed instruction to the end of the block,
+        // in the original order
+        for (auto* I : toDelay) builder.Insert(I);
 
         auto I = B->begin();
-        // Delete their duplicates earlier in the block
+        // Delete the originals
         for (; I != B->end();) {
 #if DEBUG
           errs() << *I << "\n";
 #endif
           if (toDelete.find(&*I) != toDelete.end()) {
 #if DEBUG
-            errs() << "Delete\n";
+            errs() << "Deleted\n";
 #endif
             I = I->eraseFromParent();
           } else
diff --git a/ocelot/AtomicRegionInference/src/include/HelperTypes.h b/ocelot/AtomicRegionInference/src/include/HelperTypes.h
index 29efcc0..4a9414e 100644
--- a/ocelot/AtomicRegionInference/src/include/HelperTypes.h
+++ b/ocelot/AtomicRegionInference/src/include/HelperTypes.h
@@ -34,6 +34,7 @@ typedef std::map<Instruction*, val_vec> inst_vals_map;
 typedef std::map<Instruction*, inst_set> inst_insts_map;
 typedef std::vector<Function*> func_vec;
 typedef std::vector<inst_vec> inst_vec_vec;
+typedef std::map<Instruction*, Instruction*> inst_inst_map;
 
 extern gv_vec gv_list;
 
diff --git a/ocelot/AtomicRegionInference/src/include/InferAtoms.h b/ocelot/AtomicRegionInference/src/include/InferAtoms.h
index 19701e0..217f92b 100644
--- a/ocelot/AtomicRegionInference/src/include/InferAtoms.h
+++ b/ocelot/AtomicRegionInference/src/include/InferAtoms.h
@@ -17,6 +17,7 @@
 #include "llvm/Passes/PassBuilder.h"
 #include "llvm/Passes/PassPlugin.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/InstructionNamer.h"
 
 using namespace llvm;
 
@@ -44,6 +45,11 @@ llvmGetPassPluginInfo() {
       .PluginName = "Atomic Region Inference Pass",
       .PluginVersion = "v0.1",
       .RegisterPassBuilderCallbacks = [](PassBuilder& PB) {
+        // PB.registerPipelineParsingCallback(
+        //     [](StringRef PassName, FunctionPassManager& FPM, ...) {
+        //       FPM.addPass(InstructionNamerPass());
+        //       return true;
+        //     });
         PB.registerPipelineStartEPCallback(
             [](ModulePassManager& MPM, OptimizationLevel Level) {
               MPM.addPass(InferAtomsPass());

From 7d71c889bd800bf74f5a88953990c9ae3754cdf2 Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Sat, 3 Feb 2024 18:33:52 -0500
Subject: [PATCH 06/18] [InferAtomsPass] More code cleanup & debug logging

---
 benchmarks/ctests/example.bc                  | Bin 0 -> 5984 bytes
 benchmarks/ctests/example.rs                  |  24 ++
 benchmarks/ctests/example03.ll                |  16 +-
 benchmarks/ctests/example04.c                 |  27 ++
 benchmarks/ctests/example04.orig.ll           | 102 +++++++
 ocelot/AtomicRegionInference/Makefile         |   5 +
 ocelot/AtomicRegionInference/src/Helpers.cpp  |   8 +
 .../AtomicRegionInference/src/InferAtoms.cpp  | 262 ++++++++++++------
 .../src/InferFreshCons.cpp                    |  27 +-
 .../src/TaintTracker.cpp                      | 161 ++++++-----
 .../src/include/Helpers.h                     |   1 +
 11 files changed, 457 insertions(+), 176 deletions(-)
 create mode 100644 benchmarks/ctests/example.bc
 create mode 100644 benchmarks/ctests/example.rs
 create mode 100644 benchmarks/ctests/example04.c
 create mode 100644 benchmarks/ctests/example04.orig.ll

diff --git a/benchmarks/ctests/example.bc b/benchmarks/ctests/example.bc
new file mode 100644
index 0000000000000000000000000000000000000000..4163fd996b61aa6750aed7c63c25855384623a88
GIT binary patch
literal 5984
zcmcgv4NzNGcD@fi^qxUHJ&e2tTaurExY!Ns(+~PFFx8LkV230!VOyGbS3Lb0*8B)b
z#z3;o17U=BaR<9}LNiVai8q_1$u1!tkF)i3kVG8TC0>$EG9K6UK){K!V>`q@yGe&>
zd#(gFj^mwl`*TO{y?eiR?sv{T_ndQ|E{?CwM9>s)RjoznKcU$j;}=T-U;uN=c%|)~
zjMwTiUuUWPGt$6}G&n82?v&rK%9~iKIY5N~oR-3HmSe6s8M}ge9Oh|c2>nQcP-dD2
zrNQ_+a8X?ahi#MO4k@d%93gsS52Vw1YVs#;QL!z}ek9X=jMXX3bne3$y6V2lz=Wn?
z>*{DfYdezWouuDTaE|SL+^+~-bx-c*qRj)^r$=`m=H8wJ-h&)MU^>e8lkAwidq;LP
z+zz+Kos}7@Hf}qeZm-@*KlogMb#?x^G#ZIn<Fqw9W2gWfs8$B7@QmaaI=h`cM|KYE
zbpE&%nK?zlPs6Clv9pdFN83?D25|4nMd&ejuD{HEiG4%_x_Ook4MYR=(O|u#y*(NZ
zP@P3o2f8q;x-gP;ZbTW6DN(mNZd0EfQC=R&I!&m5OVCuiNVOG3jrCD8GER0aU@iwM
zO%s^wEOXV#G$Hxt1I#t6{1PV@%T7hgjuT~r4)c)2I_)q|CM<)JisLc!xT6BiBKg8N
zb7@+7m8Hg|TXO#cn4Up>oQ#Ys=QYanBg*qgeNm&j(4{_OQ=|Ru)c$%2s1ERyI2;WO
zL_<YXXMLnqjCP_pp`Phdof*kGuVKWj^lXwC8!77MXrrQ+B`14L<|H{cWmCsTu-MG3
z>iwAN@`!St2t@Y>qQ<&tkfuV2?xd*>b2K;{HRe+7FH@XyhERhm&JU|TMra54kOCj(
zbl2IrDpu;Y^HsS$@gY{3gYPW7LOG*gS=*>&OMN6DQtiN7A8Dhbt?ki{@u;~^ViMsO
z$r8X*(@f_A(=@=eS*gxCDm*RyjHQAi)tS6E%`hQu34b$*?3B~lM{K?Qiaf$#A3ECU
zd-`P7o@c)=<eoLY_;>H<l$N&4xyIMOc<WmC)gk1~9qU1e1A}N5adCpm3RK=Y)lDD=
zKuJRh5ji%yoDvct0;OR|h;yR1rl>0;1IZxrtr#Lvn(UQ2$65u-r4bS_L?be{`jvy~
zT*PA{0xlNO{IUGC6;4#g!4UFAEJAEu<t6>^>(Dq45NH8HZhYX?-bR5CyI=@>Kxh>;
zA8z?uLV_YNykpqVx^>A(BA_Xa?|zWWBOXt>t#RxhV?@%-+f?8C!Qrm2QN4Qcxd=sz
zU=xSAV0v#2Z?svH`Y{#o^T*cd^V85c?qdK*4`!tuxFJPo%$lkrI`xnuFS}}>f7Ql^
zPLqzE>AT2F&iGeRg4~WO3wx5Y@Oo0sD#;9IYPRyV=naUOQvm8|Q8_WI^ec#pK8Lv{
z(Qg;2j&>?E)GGS~$*;1^O_9;d?CN+-1?lGJ)01M6^;43HNk>J$q++VQA2ReQh|kJs
z08%v|X={%LxM<!~HgjcwndRlpynJqfS#UCXc0zSgqdcck#RavWKZw(C+F>~^DL)k{
z?~#;EEhyURsHf^F6V83gB;sre)InZTZC|DX?S3)pf|$*X28W^@LtPJETVSruFt^5;
zE4;)#30X6uoJ!s=2+A`W<piXjApK~-VVQJThLZP_2{UAUpQLOs@m)~5*j@I4q--=H
zd#529sEq~#k#Iduj&N=?JRHqC`?&lXl3#Z+^8?J>av|1pMXC!iTz}rzD4_^km{HEp
zs(wqT{kgl$qmC^{BV|+0!li}OMWH^l5ACYDIUvU#Q2$VUMo?bXs4m7-e?_RV!5Y<q
zAgwtNF;6ARMjaL4ki~fUyrZlShKc^SGhn6B{a_D`%V2P*Yxf+>T!8}RWNz>UvG;B2
z`PsGSW>pu#fXckG(MZMd$oigyx!+-#N|c|BStb*(#`3Ak<E=CWR<(iyYomGpdYZXX
zC%-x)UEt)Gcw7~K=VY2$W{ziIcw<KT*$gx9WWMAJKE5I-<6WwAvum$tRG(;66B-rl
z;APDQKlgmuDM!UgiDfWinW{YnyAO+nO4V73YggXCe9C;z%V%ep&#dy#hCxe$kc3{Q
zU@fj82o>z%@HA{?SC7pylK|IMz)nqBCx)RCs%C8Ji!r6Y(8Dvccz@+Ua*20nBKvZw
zP-WCL9DQ1Zxo->-<^75B!Gw9*u?50u3OpvO)U2@!YB2R|OjX^b@GJN5^6NZvWr3MP
z9UMZNA?}0fO3u+I%Uc!HRdNMg{f7^~w{@5;@|J6L_P-v`yl7@e(2g-Fa_(?&RlYV~
zqjSf$R42GE6!vg!zF?@KrJ2)~nDiw)SLE>+je^%;G;=y$r!Us>#k_tK=qaEN8oCFn
z3g}g(2`l-wETU29$%IVoCpQ)q7>)mAeD;KCY|9DVTK$R9H{aSt&Q%^b^zP5!ec;e*
zQ-_)d5%CJ-X)*9*W(12M{;AH|z!3NxyUQ}2=>9}`dL-*I93L^fHL1Azj$y8j`P3<&
z<>d1?Xyu<enVX|x7u5lW6l7C-6e0n~Qf=U9!VKr&u}H;WB5f}urt-q9@&c!v7x4XZ
zmpU#er$>~NsQ?gSg#0EapLNQiCS>-dHX{AkW^nsCNG>=-BV`bEP!lF4WneOT59%hA
zDnw0{AB&V7g`|^I3`fc#0S6Me!U-Il`|!*mZ4crSi)G2vdyk^ZMPUOzXJ=LOnhpP+
zEQVk3$@-0QdXAlE9|XTj(@}R_U2VLNAVe<L+|?_AVvW#C*$T>8nZrH;eL|a>^JMRg
z41Xu#CUB{OyQ~wF5AK9Im~H*|aJ*{baQiX7(MnT9e{LZ~cgG4=33ZBzQ9?tK*0^SU
zj|3**54+!K7EiUZR&UKmt(ARWaB?Bv=37TfxI<m^GtKKsHZM*n-Y2)_fanT4UU(Rf
zaT7RLH#~DlB3<myRUo>%Yb3ouO2Vo}tMZPW<CHY^#|h2i$yRpt8;XxKi^m)2XMThZ
z7>M`CJDeGHJAR2zF=zsZ&>DE&ySB&24y0u*s|lv3mVwei_Oxd4L<jw6P3z*W4OYz~
zRs*c>w%>kyH@EzL+{>E~flUAzY!2pKVJ;vk`*1AhkXw`cjrVJCXB{bCu7@7pgPJkC
z2ZN0xd=62#ev@oH8?&L`C}}!BG8`td#qbsdWw+(OnKg-~jHI34!;&|ok~QeXRRsSW
zM?TN=&*3>%^gQ#fBrDl_lZ=(jTG1-tH}RycX+yl_$-af(Nb?tOty0GADC^elahCl3
z@^0a^W4oU0ec|B~vWK3<cHt&)OW#-|j<l4_OPa-7%k!3Hu%4CvS8RtEhp2&b9^DR1
z=nLF8@YxpEu&U)<NXdMiZ}7jvm(8tInbesDx^u`_Dr*eC?z7?S;dk+FC*o-7R8GB_
zO|Ji??A`aw#or*e=b8^C{qRleO6f@DG>!-C1B)_GYeg(6@kY9BlUz(XYemVqeAQVA
z$lx}@A@p5%zDWMcNpP<sjlHLj)z~NKgX^q)GEM&0K2~e5X4rgB&%`NiYj0on>gyt#
zXRm&O&G+{7e872oycc-;1Az?5%OvWuWr=49QbY?*@@G!Sdb+SCwh8JCN%=sck3?40
z(X{jzK*Hvve+Dm5O?(rM-Jb?Bz-()=XorDQmU<;A0ts1WhU`)x0}Qu0wjq*5-3gHx
zPTOU5igkqO97U*=Tly{oGkek|NHhDWNJi3Y#lIA#zesOKyG8o*^dZqDW<;bX$LmGe
zv(j_uha&R}Y3|{@pzbFbrUM_|q@Sw&aDVLPryE|E+$i4lm2zsVc}|CX@ZawM$gGwO
z7GXQF-~I${*vu-pu&-<3!VN!H!$pIwxHW(r93==oLy;PucrS5VevkWD9=Cr087Dk(
zzT^L9_`ek55&9hPKZWrR;KFAp#t8#L>%f>DFz$n^atX)s9>AZ3@ibgdtib;naC|^)
z27|Y(z`t6;y<fu(P^j8L?(YHLze4^s!2bfqYhaCkz5<s(;oJ!0y?{Tk0^bBUs6YdN
z!%|Y~0{e>rFNE>m0A8?+gZ;af<WGD}{(FF9-nRh9^OpV72{_Q9e*;{<gwp{09B>7U
zSx|~=K9<LL9B^0yDgYeYmE-~Z9l-H_W8CmQC2`oi_W;LZeBu9lNgPjv_I-$rUxI%i
zHr`UfxGgNW8+|4Gj>lY&+8){Fdg4)iDD2hpp^lKt)6&$`(rj@Hp1t9q;PL6qE*Ji)
zak+ha8k$|9`j#N{m)~kl^;)f^ROj&<wLWdB*6edH)9Hd?E#D|K?{S5~0`yJwg5F(f
z@C$COUk8$I%Y7t4W8prNyD)k4G`58H1%1%>V!&GnJU#<3xs8I~;5WPdf*<o3JS{<=
zz9kSc`}Z|_!tnKF+}`|Hv&U!PJwjuns|C7uSeG0Ss&8nzi<vk2-KBn=PB4_ZwI*HC
z!uv8?ZWDt|I-l8R_6sJx!Q>URI{pvZbk|Iy9z1C98oh3>(V#Q0&>08^&86O8OTYz=
z4>ZDN6js#~!cLWx1j8jIcYKTC!YXHBVKrP2)fASL+*#^94#Waru-9M^EP9>S<1Z~0
z3_24wxYXdfi)x>aFBC$B_>t5a2)1~9p-@RljW5{L&@5m#V>)Bd7l5z;R$gcHYTbIj
zx71`Y@uuX~n0)O*Q=rjTx;^L%)nnBbU8&z|F12V47PrSJYzsF9K>M<szOiKwNa>A!
zliTaI_)T7~U~s#)34xVTh9;q*8N>v=-oy(YScl(W_Lzi~L1nq)B5+~Y7i<<9@f5G%
z=S?QT==Jhi1Ml9JjBj7PD*)f^Eg;hn?ntqh@Flup{I)ptDPIhs44K1u4ZOkQ77RYU
z#p^G%l$x}>*=Vtt&0e3^<Z=77THfF;)%k^BlhIHNhBx|(n*>iws9mcCPJYQFB?3&-
z8~h%f*{d}PW@E8u&%P4g^}n5vOMKrZBfnYnk4Q-%x^L$E4|DPlQQwtX_a^1Ho_cRq
z-k$pZoR*;X{~|Aeb~)khpO`M3nJXmkmzpk|n=9n*pPV3s&l((x-!?l_$IspA>B9M0
R<yt<-TuUce^0Y(fzX0EM$?5<A

literal 0
HcmV?d00001

diff --git a/benchmarks/ctests/example.rs b/benchmarks/ctests/example.rs
new file mode 100644
index 0000000..68583bd
--- /dev/null
+++ b/benchmarks/ctests/example.rs
@@ -0,0 +1,24 @@
+fn Fresh<T>(_var: T) -> () {}
+
+fn Consistent<T>(_var: T, _id: u16) -> () {}
+
+#[no_mangle]
+pub static IO_NAME: fn() -> i32 = tmp;
+
+#[no_mangle]
+fn tmp() -> i32 {
+    0
+}
+
+fn log(i: i32) -> () {}
+
+#[no_mangle]
+fn app() -> () {
+    let x = tmp();
+    Fresh(x);
+    log(x)
+}
+
+fn main() -> () {
+    app()
+}
diff --git a/benchmarks/ctests/example03.ll b/benchmarks/ctests/example03.ll
index f642b6b..df86c33 100644
--- a/benchmarks/ctests/example03.ll
+++ b/benchmarks/ctests/example03.ll
@@ -45,15 +45,15 @@ entry:
   call void @atomic_start()
   %call = call i32 @input()
   store i32 %call, ptr %x, align 4
-  %0 = load i32, ptr %x, align 4
-  call void @log(i32 noundef %0)
-  call void @atomic_end()
   store i32 1, ptr %y, align 4
-  %1 = load i32, ptr %y, align 4
-  %2 = add nsw i32 %1, 1
-  store i32 %2, ptr %z, align 4
-  %3 = load i32, ptr %z, align 4
-  call void @log(i32 noundef %3)
+  %0 = load i32, ptr %y, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, ptr %z, align 4
+  %1 = load i32, ptr %z, align 4
+  call void @log(i32 noundef %1)
+  %2 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %2)
+  call void @atomic_end()
   ret void
 }
 
diff --git a/benchmarks/ctests/example04.c b/benchmarks/ctests/example04.c
new file mode 100644
index 0000000..5cbb707
--- /dev/null
+++ b/benchmarks/ctests/example04.c
@@ -0,0 +1,27 @@
+#include <stdio.h>
+
+void Fresh(int x) {}
+void Consistent(int x, int id) {}
+void FreshConsistent(int x, int id) {}
+
+void atomic_start() {}
+void atomic_end() {}
+
+int input() { return 0; }
+int (*IO_NAME)() = input;
+
+void log(int x) {
+  printf("%d\n", x);
+}
+
+void app() {
+  int x = input();
+  int y = input();
+  log(y);
+  Consistent(x, 1);
+  FreshConsistent(y, 1);
+}
+
+int main() {
+  app();
+}
\ No newline at end of file
diff --git a/benchmarks/ctests/example04.orig.ll b/benchmarks/ctests/example04.orig.ll
new file mode 100644
index 0000000..8491e4d
--- /dev/null
+++ b/benchmarks/ctests/example04.orig.ll
@@ -0,0 +1,102 @@
+; ModuleID = '../../benchmarks/ctests/example04.c'
+source_filename = "../../benchmarks/ctests/example04.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Fresh(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Consistent(i32 noundef %x, i32 noundef %id) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  %id.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  store i32 %id, ptr %id.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @FreshConsistent(i32 noundef %x, i32 noundef %id) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  %id.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  store i32 %id, ptr %id.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %call = call i32 @input()
+  store i32 %call, ptr %x, align 4
+  %call1 = call i32 @input()
+  store i32 %call1, ptr %y, align 4
+  %0 = load i32, ptr %y, align 4
+  call void @log(i32 noundef %0)
+  %1 = load i32, ptr %x, align 4
+  call void @Consistent(i32 noundef %1, i32 noundef 1)
+  %2 = load i32, ptr %y, align 4
+  call void @FreshConsistent(i32 noundef %2, i32 noundef 1)
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile
index 306f019..360ec68 100644
--- a/ocelot/AtomicRegionInference/Makefile
+++ b/ocelot/AtomicRegionInference/Makefile
@@ -4,6 +4,7 @@ all:
 	make eg1
 	make eg2
 	make eg3
+	make eg4
 
 eg1:
 	TEST=example01 make test
@@ -11,6 +12,8 @@ eg2:
 	TEST=example02 make test
 eg3:
 	TEST=example03 make test
+eg4:
+	TEST=example04 make test
 
 run_eg1:
 	TEST=example01 make run
@@ -18,6 +21,8 @@ run_eg2:
 	TEST=example02 make run
 run_eg3:
 	TEST=example03 make run
+run_eg4:
+	TEST=example04 make run
 
 test:
 	$(MAKE) -C build
diff --git a/ocelot/AtomicRegionInference/src/Helpers.cpp b/ocelot/AtomicRegionInference/src/Helpers.cpp
index a0f62cf..5ca398e 100644
--- a/ocelot/AtomicRegionInference/src/Helpers.cpp
+++ b/ocelot/AtomicRegionInference/src/Helpers.cpp
@@ -34,3 +34,11 @@ void printInsts(const inst_vec& iv) {
     errs() << *inst << "\n";
   }
 }
+
+void printIntInsts(const std::map<int, inst_vec>& iim) {
+  for (auto& [id, insts] : iim) {
+    errs() << id << " ->\n";
+    printInsts(insts);
+    errs() << "\n";
+  }
+}
diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
index b0219cd..be7f108 100644
--- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp
+++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
@@ -30,6 +30,8 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) {
   std::map<int, inst_vec> consVars;
   inst_vec_vec freshVars;
   inst_insts_map inputMap = buildInputs(this->M);
+  errs() << "inputMap:\n";
+  printInstInsts(inputMap);
   inst_vec toDelete;
   getAnnotations(&consVars, &freshVars, inputMap, &toDelete);
   // TODO: need to add unique point of call chain prefix to cons set
@@ -90,47 +92,60 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
       for (auto& I : B) {
         if (auto* ci = dyn_cast<CallInst>(&I)) {
 #if DEBUG
-          errs() << "[Loop Inst] cur inst = CallInst\n";
+          errs() << "[Loop Inst] Found call: " << *ci << "\n";
 #endif
           auto* fun = ci->getCalledFunction();
           // Various empty or null checks
           if (fun == NULL || fun->empty() || !fun->hasName()) continue;
           // Consistent and FreshConsistent
+          // TODO: Fix FreshConsistent
           if (isAnnot(fun->getName()) && !fun->getName().equals("Fresh")) {
-#if DEBUG
-            errs() << "[Loop Inst] Calls Consistent/FreshConsistent\n";
-#endif
             toDelete->push_back(ci);
-            // First para is var, second is id
             int setID;
             // Bit cast use of x, then value operand of store
-            Instruction* var = dyn_cast<Instruction>(ci->getOperand(0));
-
+            auto* var = dyn_cast<Instruction>(ci->getOperand(0));
             if (var == NULL) continue;
-            // errs() << "New consistent annot. with " << *var<<"\n";
-            Value* id = ci->getOperand(1);
-            if (ConstantInt* cint = dyn_cast<ConstantInt>(id)) {
+#if DEBUG
+            errs() << "Cons. annot. for: " << *var << "\n";
+#endif
+
+            auto* id = ci->getOperand(1);
+            if (auto* cint = dyn_cast<ConstantInt>(id)) {
               setID = cint->getSExtValue();
+#if DEBUG
+              errs() << "In set with label: " << setID << "\n";
+#endif
             }
+
             std::queue<Value*> customUsers;
             std::set<Instruction*> v;
             // v.emplace(ci);
             // in case var itself is iOp
-            for (Instruction* iOp : inputMap[var]) {
-              v.emplace(iOp);
+#if DEBUG
+            errs() << "Add to v inputs assoc. w/ Cons. var:\n";
+#endif
+            for (auto* input : inputMap[var]) {
+#if DEBUG
+              errs() << "Input: " << *input << "\n";
+#endif
+              v.emplace(input);
             }
 
             // customUsers.push(var);
-            for (Value* use : var->users()) {
-              // don't push the annotation
-              if (use == ci) {
-                continue;
-              }
-              // errs() << "DEBUG: pushing use of var: " << *use << "\n";
+#if DEBUG
+            errs() << "Collect uses of Cons. var:\n";
+#endif
+            for (auto* use : var->users()) {
+              // Don't push the annotation
+              if (use == ci) continue;
+#if DEBUG
+              errs() << "Use: " << *use << "\n";
+#endif
               customUsers.push(use);
             }
+
             while (!customUsers.empty()) {
-              Value* use = customUsers.front();
+              auto* use = customUsers.front();
               customUsers.pop();
               // errs() << "DEBUG: use is " << *use << " of var " << *var<<"\n";
               if (Instruction* instUse = dyn_cast<Instruction>(use)) {
@@ -168,34 +183,67 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
                 }
               }
             }
-            // last case
-            if (v.empty()) {
-              // some entries have a first link with ci, not var
 
-              for (Instruction* iOp : inputMap[ci]) {
+            // Last case
+            if (v.empty()) {
+#if DEBUG
+              errs() << "v empty, go over inputs assoc. w/ Cons. annot.:\n";
+#endif
+              // Some entries have a first link with ci, not var
+              for (auto* input : inputMap[ci]) {
+#if DEBUG
+                errs() << "Input: " << *input << "\n";
+#endif
                 if (inputMap[ci].size() == 1) {
-                  for (Instruction* origLink : inputMap[iOp]) {
+#if DEBUG
+                  errs() << "Set of assoc. inputs is a singleton\n";
+#endif
+                  for (auto* origLink : inputMap[input]) {
+#if DEBUG
+                    errs() << "Add to v the original input: " << *origLink << "\n";
+#endif
                     v.emplace(origLink);
                   }
                 } else {
-                  v.emplace(iOp);
+#if DEBUG
+                  errs() << "Set of assoc. input isn't a singleton, add to v the input\n";
+#endif
+                  v.emplace(input);
                 }
               }
             }
-            // for later deletion purposes
+
+            // For later deletion purposes
+#if DEBUG
+            errs() << "Remove inputs assoc. w/ Cons. annot.\n";
+#endif
             inputMap.erase(ci);
 
             if (!v.empty()) {
-              inst_vec temp;
-              for (Instruction* item : v) {
-                temp.push_back(item);
+#if DEBUG
+              errs() << "v not empty\n";
+#endif
+              inst_vec tmp;
+#if DEBUG
+              errs() << "Add each item in v to tmp:\n";
+#endif
+              for (auto* item : v) {
+#if DEBUG
+                errs() << "Item: " << *item << "\n";
+#endif
+                tmp.push_back(item);
               }
-              // add the collected list to the map
+
+              // Add the collected list to the map
               if (consVars->find(setID) != consVars->end()) {
-                consVars->at(setID).insert(consVars->at(setID).end(), temp.begin(), temp.end());
+                consVars->at(setID).insert(consVars->at(setID).end(), tmp.begin(), tmp.end());
               } else {
-                consVars->emplace(setID, temp);
+                consVars->emplace(setID, tmp);
               }
+#if DEBUG
+              errs() << "Add tmp items to consVars: \n";
+              printIntInsts(*consVars);
+#endif
             }
           } else if (fun->getName().equals("Fresh")) {
 #if DEBUG
@@ -247,10 +295,7 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
                   }
                 }
               }
-            } else {
-              // errs() << "error casting\n";
             }
-            // errs() << "New Fresh annot. with " << *var<<"\n";
             // v.push_back(ci);
 
 #if DEBUG
@@ -341,33 +386,71 @@ void InferAtomsPass::removeAnnotations(inst_vec* toDelete) {
   }
 }
 
-/*Given the starting point annotations of conSets, find the
-deepest unique point of the call chain*/
+// Given the starting point annotations of conSets, find the
+// deepest unique point of the call chain
 std::map<int, inst_vec> InferAtomsPass::collectCons(std::map<int, inst_vec> startingPoints, inst_insts_map inputMap) {
+#if DEBUG
+  errs() << "=== collectCons ===\n";
+#endif
   std::map<int, inst_vec> toReturn;
-  for (std::pair<int, inst_vec> iv : startingPoints) {
+
+#if DEBUG
+  errs() << "Go over all cons. sets\n";
+#endif
+  for (auto& [id, starts] : startingPoints) {
+#if DEBUG
+    errs() << "Go over cons. set " << id << "\n";
+#endif
     std::set<Instruction*> unique;
     std::map<Instruction*, std::set<Instruction*>> callChains;
-    // each item should be the starting point from a different annot
-    for (Instruction* item : iv.second) {
+
+    // Each item should be the starting point from a different annot
+    for (auto* start : starts) {
+#if DEBUG
+      errs() << "Starting point: " << *start << "\n";
+#endif
+      // Add self to call chain
 #if DEBUG
-      errs() << "Starting point: " << *item << "\n";
+      errs() << "Add starting point to call chain\n";
 #endif
-      // add self to call chain
-      callChains[item].insert(item);
+      callChains[start].insert(start);
 
-      for (Instruction* iOp : inputMap[item]) {
+#if DEBUG
+      errs() << "Go over inputs assoc. w/ starting point:\n";
+#endif
+      for (auto* input : inputMap[start]) {
         //    unique.insert(iOp);
-        callChains[item].insert(iOp);
+#if DEBUG
+        errs() << "Input: " << *input << "\n";
+        errs() << "Add input to call chain\n";
+#endif
+        callChains[start].insert(input);
+
         std::queue<Instruction*> toExplore;
-        toExplore.push(iOp);
+#if DEBUG
+        errs() << "Add input to toExplore, go over toExplore\n";
+#endif
+        toExplore.push(input);
+
         while (!toExplore.empty()) {
-          Instruction* curr = toExplore.front();
+          auto* cur = toExplore.front();
           toExplore.pop();
-          for (Instruction* intermed : inputMap[curr]) {
-            if (!(find(callChains[item].begin(), callChains[item].end(), intermed) != callChains[item].end())) {
-              callChains[item].insert(intermed);
+#if DEBUG
+          errs() << "Exploring cur: " << *cur << "\n";
+          errs() << "Go over inputs assoc. w/ cur: " << *cur << "\n";
+#endif
+
+          for (auto* intermed : inputMap[cur]) {
+#if DEBUG
+            errs() << "intermed: " << *intermed << "\n";
+#endif
+            if (find(callChains[start].begin(), callChains[start].end(), intermed) == callChains[start].end()) {
+              callChains[start].insert(intermed);
               toExplore.push(intermed);
+            } else {
+#if DEBUG
+              errs() << "intermed already in call chain\n";
+#endif
             }
           }
         }
@@ -375,57 +458,78 @@ std::map<int, inst_vec> InferAtomsPass::collectCons(std::map<int, inst_vec> star
       }  // finish constructing call chain for one annot. in the set
 
     }  // constructed call chains for ALL annot. in the set.
-    // now check the call chain
+       // now check the call chain
 
     // int index = 0;
     // map<Instruction*,bool> foundUniquePoint;
     // clean up the call chains
 
-    for (auto ccmap : callChains) {
-      for (Instruction* possibility : ccmap.second) {
-        // if the link is in the same function, then continue
-        // errs() << "examining possibility: " << *possibility << "\n";
-        bool sf = false;
-        for (Instruction* link : inputMap[possibility]) {
-          // errs() << "next link is" << *link << "\n";
-          if ((link != possibility) && link->getFunction() == possibility->getFunction()) {
-            sf = true;
-          }
-        }
-        if (sf) {
+#if DEBUG
+    errs() << "Finished building call chains, go over them\n";
+#endif
+    for (auto callChain : callChains) {
+#if DEBUG
+      errs() << "Next chain\n";
+#endif
+      auto& [id, chain] = callChain;
+      for (auto* inst : chain) {
+#if DEBUG
+        errs() << "Cur point along chain: " << *inst << "\n";
+#endif
+        bool isSameFun = false;
+        for (auto* link : inputMap[inst])
+          isSameFun = ((link != inst) && link->getFunction() == inst->getFunction());
+        if (isSameFun) {
+#if DEBUG
+          errs() << "Continue if the link is in the same function\n";
+#endif
           continue;
         }
+
         bool isUnique = true;
-        for (auto ccmapNest : callChains) {
-          // if self then skip
-          if (ccmapNest == ccmap) {
-            continue;
-          }
-          // otherwise check if this map also contains the possibility
-          if (find(ccmapNest.second.begin(), ccmapNest.second.end(), possibility) != ccmapNest.second.end()) {
+        for (auto otherCallChain : callChains) {
+          // Skip if self
+          if (otherCallChain == callChain) continue;
+          auto& [_, otherChain] = otherCallChain;
+          // Otherwise check if this map also contains inst
+          if (find(otherChain.begin(), otherChain.end(), inst) != otherChain.end()) {
             isUnique = false;
             break;
           }
         }
+
         if (isUnique) {
-          unique.insert(possibility);
-          //  errs() << "Found unique!" << *possibility << "\n";
-        } else {
-          // try another poss.
-          continue;
+          unique.insert(inst);
+#if DEBUG
+          errs() << "Found unique point along chain: " << *inst << "\n";
+#endif
         }
       }
     }
 
     inst_vec v;
-    for (Instruction* item2 : unique) {
-      if (!isa<AllocaInst>(item2)) {
-        v.push_back(item2);
+#if DEBUG
+    errs() << "Go over unique insts\n";
+#endif
+    for (auto* inst : unique) {
+      if (!isa<AllocaInst>(inst)) {
+#if DEBUG
+        errs() << "Unique inst != AllocaInst, add to v: " << *inst << "\n";
+#endif
+        v.push_back(inst);
       }
     }
-    toReturn[iv.first] = v;
+
+#if DEBUG
+    errs() << "Add v to toReturn at ID " << id << ": \n";
+    printInsts(v);
+#endif
+    toReturn[id] = v;
   }  // end starting point check
 
+#if DEBUG
+  errs() << "*** collectCons ***\n";
+#endif
   return toReturn;
 }
 
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index 2802f76..8dc4375 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -60,10 +60,9 @@ BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) {
 
 // Top level region inference function -- could flatten later
 void InferFreshCons::inferConsistent(std::map<int, inst_vec> consSets) {
-  // TODO: start with pseudo code structure from design doc
-  for (auto [id, set] : consSets) {
+  for (auto& [id, set] : consSets) {
 #if DEBUG
-    errs() << "[InferConsistent] starting set " << id << "\n";
+    errs() << "[InferConsistent] Adding region for set " << id << "\n";
 #endif
     addRegion(set, Consistent);
   }
@@ -74,7 +73,6 @@ void InferFreshCons::inferFresh(inst_vec_vec freshSets) {
 #if DEBUG
   errs() << "=== inferFresh ===\n";
 #endif
-  // TODO: start with pseudo code structure from design doc
   for (auto freshSet : freshSets) addRegion(freshSet, Fresh);
 #if DEBUG
   errs() << "*** inferFresh ***\n";
@@ -220,7 +218,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
                   clone->setOperand(i, it->second);
                 }
               }
-            } else if (auto* ci = dyn_cast<CallInst>(&I)) {
+            } else if (isa<CallInst>(&I)) {
               clone = I.clone();
 
               if (auto* op = dyn_cast<Instruction>(I.getOperand(0))) {
@@ -338,9 +336,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
     } else if (endDom == nullptr) {
       errs() << "[Error] Null endDom after scope merge\n";
     }
-#if DEBUG
-    errs() << "[Loop regionsNeeded] Insert insts\n";
-#endif
+
     // TODO: fallback if endDom is null? Need hyper-blocks, I think
     // possibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations?
     auto* regionStart = truncate(startDom, true, targetInsts, seenFuns);
@@ -381,15 +377,13 @@ Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set
 
 #if DEBUG
   errs() << "Set:\n";
-  for (auto& inst : set)
-    errs() << *inst << "\n";
+  printInsts(set);
 #endif
 
   // Truncate the front
   if (forwards) {
 #if DEBUG
-    errs() << "Truncate startDom\n";
-    errs() << "Go over each inst\n";
+    errs() << "Truncate startDom, go over each inst\n";
 #endif
     for (auto& I : *B) {
       // Stop at first inst in bb that is in the set.
@@ -414,11 +408,10 @@ Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set
   }
 
 #if DEBUG
-  errs() << "Truncate endDom\n";
-  errs() << "Go over each inst in reverse\n";
+  errs() << "Truncate endDom, go over each inst in reverse\n";
 #endif
   // Reverse directions if not forwards
-  Instruction* prev = NULL;
+  Instruction* prev;
   for (auto I = B->rbegin(), rend = B->rend(); I != rend; I++) {
     auto* inst = &*I;
     if (find(set.begin(), set.end(), inst) != set.end()) {
@@ -427,7 +420,7 @@ Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set
 #endif
       // Need to return the previous inst (next in forwards),
       // as it should be inserted before the returned inst
-      if (prev == NULL) {
+      if (prev == nullptr) {
         // Only happens if use is a ret inst, which is a scope use to make the branching
         // work, not an actual one, so this is safe
         return inst;
@@ -585,7 +578,7 @@ inst_inst_pair InferFreshCons::findShortest(inst_inst_vec regionsFound) {
     // Get the max length from the bb to the end instruction
     std::vector<BasicBlock*> v;
     int endLength = getSubLength(startParent, end, v);
-    // Substract the prefix before the start inst
+    // Subtract the prefix before the start inst
     endLength -= prefixLength;
 #if DEBUG
     errs() << "[Loop regionsFound] Region length " << endLength << "\n";
diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
index f80f7ce..1c400d9 100644
--- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp
+++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
@@ -12,7 +12,7 @@ inst_insts_map buildInputs(Module* M) {
 
   for (auto inputInst : inputInsts) {
 #if DEBUG
-    errs() << "[Loop inputInst] orig input: " << *inputInst << "\n";
+    errs() << "[Loop inputInst] inputInst: " << *inputInst << "\n";
 #endif
 
     // Add self to map
@@ -32,24 +32,24 @@ inst_insts_map buildInputs(Module* M) {
 #if DEBUG
       errs() << "=== Loop toExplore ===\n";
 #endif
-      auto* curVal = toExplore.front();
+      auto* curInst = toExplore.front();
       toExplore.pop();
 
-      if (curVal == NULL) continue;
+      if (curInst == NULL) continue;
 
 #if DEBUG
-      errs() << "[Loop toExplore] cur inst: " << *curVal << "\n";
+      errs() << "[Loop toExplore] curInst: " << *curInst << "\n";
 #endif
 
       val_vec interProcFlows;
-      if (curVal == inputInst) {
+      if (curInst == inputInst) {
 #if DEBUG
-        errs() << "[Loop toExplore] cur inst = orig input\n";
-        errs() << "[Loop toExplore] Call traverseLocal with cur inst (tainted), orig input (srcInput), caller (none)\n";
+        errs() << "[Loop toExplore] curInst = inputInst\n";
+        errs() << "[Loop toExplore] Call traverseLocal with curInst (tainted), origInput (srcInput), caller (none)\n";
 #endif
-        interProcFlows = traverseLocal(curVal, inputInst, &taintedInsts, nullptr);
+        interProcFlows = traverseLocal(curInst, inputInst, &taintedInsts, nullptr);
 #if DEBUG
-        errs() << "[Loop toExplore] [cur inst = orig input] Inspect interProcFlows:\n";
+        errs() << "[Loop toExplore][curInst = inputInst] Inspect interProcFlows:\n";
 #endif
         for (auto* vipf : interProcFlows) {
           if (auto* iipf = dyn_cast<Instruction>(vipf)) {
@@ -59,19 +59,19 @@ inst_insts_map buildInputs(Module* M) {
             }
 
 #if DEBUG
-            errs() << "Adding orig input (" << *inputInst << ") to set at " << *iipf << "\n";
+            errs() << "Add inputInst (" << *inputInst << ") to set at " << *iipf << "\n";
 #endif
             taintedInsts[iipf].insert(inputInst);
           }
         }
-      } else if (isa<CallInst>(curVal)) {
+      } else if (isa<CallInst>(curInst)) {
 #if DEBUG
         errs() << "[Loop toExplore] cur inst = CallInst\n";
 #endif
         // Note it will not be iop, even though iop is a call
         // This case handles both returns and pbref
 
-        promotedInputs.push_back(dyn_cast<CallInst>(curVal));
+        promotedInputs.push_back(dyn_cast<CallInst>(curInst));
         auto* next = toExplore.front();
         toExplore.pop();
         // If the next is a return, this was a return flow
@@ -81,11 +81,11 @@ inst_insts_map buildInputs(Module* M) {
 #if DEBUG
           errs() << "[Loop toExplore] cur inst next = Return inst (return flow)\n";
 #endif
-          interProcFlows = traverseLocal(curVal, dyn_cast<CallInst>(curVal), &taintedInsts, nullptr);
+          interProcFlows = traverseLocal(curInst, dyn_cast<CallInst>(curInst), &taintedInsts, nullptr);
           for (Value* vipf : interProcFlows) {
             if (Instruction* iipf = dyn_cast<Instruction>(vipf)) {
               // don't add self
-              if (curVal == vipf) {
+              if (curInst == vipf) {
                 continue;
               }
               if (CallInst* anno_check = dyn_cast<CallInst>(iipf)) {
@@ -95,7 +95,7 @@ inst_insts_map buildInputs(Module* M) {
                   continue;
                 }
               }
-              taintedInsts[iipf].insert(dyn_cast<CallInst>(curVal));
+              taintedInsts[iipf].insert(dyn_cast<CallInst>(curInst));
             }
           }
         } else if (isa<Argument>(next)) {
@@ -105,7 +105,7 @@ inst_insts_map buildInputs(Module* M) {
           // Grab the para corresponding to the argument
           int index = -1;
           int i = 0;
-          CallInst* ci = dyn_cast<CallInst>(curVal);
+          CallInst* ci = dyn_cast<CallInst>(curInst);
 
           if (ci->getCalledFunction() == NULL) continue;
           if (ci->getCalledFunction()->empty()) continue;
@@ -200,7 +200,7 @@ inst_insts_map buildInputs(Module* M) {
             }
             // re nullptr check
             if (fstUse != nullptr) {
-              interProcFlows = traverseLocal(fstUse, dyn_cast<CallInst>(curVal), &taintedInsts, nullptr);
+              interProcFlows = traverseLocal(fstUse, dyn_cast<CallInst>(curInst), &taintedInsts, nullptr);
               for (Value* vipf : interProcFlows) {
                 if (Instruction* iipf = dyn_cast<Instruction>(vipf)) {
                   if (CallInst* anno_check = dyn_cast<CallInst>(iipf)) {
@@ -210,15 +210,15 @@ inst_insts_map buildInputs(Module* M) {
                       continue;
                     }
                   }
-                  taintedInsts[iipf].insert(dyn_cast<CallInst>(curVal));
+                  taintedInsts[iipf].insert(dyn_cast<CallInst>(curInst));
                 }
               }
             }
           }
         }
-      } else if (isa<Argument>(curVal)) {
+      } else if (isa<Argument>(curInst)) {
 #if DEBUG
-        errs() << "[Loop toExplore] cur inst = Argument (tainted arg)\n";
+        errs() << "[Loop toExplore] curInst = Argument (tainted arg)\n";
 #endif
 
         auto* caller = dyn_cast<CallInst>(toExplore.front());
@@ -231,11 +231,11 @@ inst_insts_map buildInputs(Module* M) {
         auto* innerInputInst = dyn_cast<Instruction>(toExplore.front());
         toExplore.pop();
 #if DEBUG
-        errs() << "[Loop toExplore] orig input: " << *innerInputInst << "\n";
-        errs() << "[Loop toExplore] Call traverseLocal with cur inst (tainted), orig input (srcInput), caller\n";
+        errs() << "[Loop toExplore] inputInst: " << *innerInputInst << "\n";
+        errs() << "[Loop toExplore] Call traverseLocal with curInst (tainted), inputInst, caller\n";
 #endif
 
-        interProcFlows = traverseLocal(curVal, innerInputInst, &taintedInsts, caller);
+        interProcFlows = traverseLocal(curInst, innerInputInst, &taintedInsts, caller);
 
 #if DEBUG
         errs() << "[Loop toExplore] Inspect interProcFlows:\n";
@@ -282,26 +282,26 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai
   errs() << "=== traverseLocal ===\n";
 #endif
 
-  val_vec interProcSinks;
+  val_vec interProcFlows;
   std::queue<Value*> localDeps;
 
 #if DEBUG
-  errs() << "Add cur inst to localDeps\n";
+  errs() << "Add tainted inst to localDeps\n";
 #endif
   localDeps.push(tainted);
   while (!localDeps.empty()) {
 #if DEBUG
     errs() << "=== Loop localDeps ===\n";
 #endif
-    auto* curVal = localDeps.front();
+    auto* curInst = localDeps.front();
     localDeps.pop();
 #if DEBUG
-    errs() << "[Loop localDeps] cur inst: " << *curVal << "\n";
+    errs() << "[Loop localDeps] curInst: " << *curInst << "\n";
 #endif
     val_vec customUsers;
-    if (auto* si = dyn_cast<StoreInst>(curVal)) {
+    if (auto* si = dyn_cast<StoreInst>(curInst)) {
 #if DEBUG
-      errs() << "[Loop localDeps] cur inst = StoreInst\n";
+      errs() << "[Loop localDeps] curInst = StoreInst\n";
 #endif
       // Add the pointer to deps, as stores have no uses
       // Add info on the store to the map
@@ -314,32 +314,46 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai
         seti.insert(srcInput);
         taintedInsts->emplace(si, seti);
       }
+
+      // See if it is (or aliases?) one of the function arguments (PBRef comp)
+      auto* storePtr = si->getPointerOperand()->stripPointerCasts();
+      errs() << "[Loop args] storePtr: " << *storePtr << "\n";
 #if DEBUG
-      errs() << "[Loop localDeps] Adding orig input (" << *srcInput << ") to set at cur inst (" << *si << ")\n";
+      errs() << "[Loop localDeps] Go over fun args\n";
 #endif
-      // See if it is (or aliases?) one of the function arguments (PBRef comp)
       for (auto& arg : si->getFunction()->args()) {
-        auto* storePtr = si->getPointerOperand()->stripPointerCasts();
 #if DEBUG
-        errs() << "[Loop localDeps] Is ptr being stored to (" << *storePtr << ") = fun arg (" << arg << ")\n";
+        errs() << "[Loop args] arg: " << arg << "\n";
+        // errs() << "[Loop localDeps] Is ptr being stored to (" << *storePtr << ") = fun arg (" << arg << ")\n";
 #endif
         if (storePtr == &arg) {
-          // if taint came from inside any callsite is potentially tainted
+          // storePtr: _x_ = input();
+          // arg: Consistent(_x_, 1);
+#if DEBUG
+          errs() << "[Loop args] storePtr = arg\n";
+#endif
+          // If taint came from inside any callsite is potentially tainted
           if (caller == nullptr) {
+#if DEBUG
+            errs() << "[Loop args] Caller = nullptr";
+#endif
             for (auto calls : si->getFunction()->users()) {
-              interProcSinks.push_back(calls);
-              interProcSinks.push_back(dyn_cast<Value>(&arg));
+              interProcFlows.push_back(calls);
+              interProcFlows.push_back(dyn_cast<Value>(&arg));
               if (auto key = dyn_cast<Instruction>(calls)) {
-                // check to make sure not already visited
+                // Check to make sure not already visited
                 //   taintedInsts->at(key).insert(srcOp);
               }
             }
           } else {
-            // otherwise, just the caller's
-            interProcSinks.push_back(caller);
-            interProcSinks.push_back(dyn_cast<Value>(&arg));
+#if DEBUG
+            errs() << "[Loop args] Caller: " << *caller << "\n";
+#endif
+            // Otherwise, just the caller's
+            interProcFlows.push_back(caller);
+            interProcFlows.push_back(dyn_cast<Value>(&arg));
             if (auto key = dyn_cast<Instruction>(caller)) {
-              // check to make sure not already visited
+              // Check to make sure not already visited
               //        taintedInsts->at(key).insert(srcOp);
             }
           }
@@ -357,14 +371,14 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai
         if (auto* useOfStore = dyn_cast<Instruction>(use)) {
           if (storePrecedesUse(useOfStore, si)) {
 #if DEBUG
-            errs() << "[Loop Store Users] store precedes this use, add:" << *useOfStore << "\n";
+            errs() << "[Loop Store Users] Store precedes this use, add:" << *useOfStore << "to customUsers\n";
 #endif
             customUsers.push_back(useOfStore);
           }
         }
       }
       // Update curVal to be the pointer
-      curVal = si->getPointerOperand();
+      curInst = si->getPointerOperand();
 
       // If it's a gepi, see if there are others that occur afterwards
       if (isa<GetElementPtrInst>(si->getPointerOperand())) {
@@ -375,22 +389,22 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai
         // check pbref, need to compare op of the gepi, not gepi itself
         for (auto& arg : si->getFunction()->args()) {
 #if DEBUG
-          errs() << " PBRef comp: " << *dyn_cast<Instruction>(curVal)->getOperand(0) << " and " << arg << "\n";
+          errs() << " PBRef comp: " << *dyn_cast<Instruction>(curInst)->getOperand(0) << " and " << arg << "\n";
 #endif
-          if (dyn_cast<Instruction>(curVal)->getOperand(0) == &arg) {
+          if (dyn_cast<Instruction>(curInst)->getOperand(0) == &arg) {
             // if taint came from inside any callsite is potentially tainted
             if (caller == nullptr) {
               for (Value* calls : si->getFunction()->users()) {
-                interProcSinks.push_back(calls);
-                interProcSinks.push_back(dyn_cast<Value>(&arg));
+                interProcFlows.push_back(calls);
+                interProcFlows.push_back(dyn_cast<Value>(&arg));
                 if (Instruction* key = dyn_cast<Instruction>(calls)) {
                   //         taintedInsts->at(key).insert(srcOp);
                 }
               }
             } else {
               // otherwise, just the caller's
-              interProcSinks.push_back(caller);
-              interProcSinks.push_back(dyn_cast<Value>(&arg));
+              interProcFlows.push_back(caller);
+              interProcFlows.push_back(dyn_cast<Value>(&arg));
               if (Instruction* key = dyn_cast<Instruction>(caller)) {
                 //  taintedInsts->at(key).insert(srcOp);
               }
@@ -401,12 +415,12 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai
 
     } else {
 #if DEBUG
-      errs() << "[Loop localDeps] cur inst != StoreInst\n";
-      errs() << "[Loop localDeps] Add users of cur inst to customUsers:\n";
-      for (auto* use : curVal->users()) errs() << *use << "\n";
+      errs() << "[Loop localDeps] curInst != StoreInst\n";
+      errs() << "[Loop localDeps] Add users of curInst to customUsers:\n";
+      for (auto* use : curInst->users()) errs() << *use << "\n";
 #endif
       // If not a store, do normal users of curVal
-      customUsers.insert(customUsers.end(), curVal->user_begin(), curVal->user_end());
+      customUsers.insert(customUsers.end(), curInst->user_begin(), curInst->user_end());
     }
 
 #if DEBUG
@@ -428,9 +442,9 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai
 #endif
           for (auto calls : ri->getFunction()->users()) {
             if (auto ci = dyn_cast<CallInst>(calls)) {
-              interProcSinks.push_back(calls);
+              interProcFlows.push_back(calls);
               // extra for bookkeeping
-              interProcSinks.push_back(use);
+              interProcFlows.push_back(use);
             }
           }
         } else {
@@ -438,9 +452,9 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai
           errs() << "[Loop customUsers] Some caller\n";
 #endif
           // otherwise, just the caller's
-          interProcSinks.push_back(caller);
+          interProcFlows.push_back(caller);
           // extra for bookkeeping
-          interProcSinks.push_back(use);
+          interProcFlows.push_back(use);
         }
       } else if (auto* ci = dyn_cast<CallInst>(use)) {
 #if DEBUG
@@ -475,16 +489,16 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai
                 // if taint came from inside any callsite is potentially tainted
                 if (caller == nullptr) {
                   for (Value* calls : ci->getFunction()->users()) {
-                    interProcSinks.push_back(calls);
-                    interProcSinks.push_back(dyn_cast<Value>(&arg));
+                    interProcFlows.push_back(calls);
+                    interProcFlows.push_back(dyn_cast<Value>(&arg));
                     if (Instruction* key = dyn_cast<Instruction>(calls)) {
                       //        taintedInsts->at(key).insert(srcOp);
                     }
                   }
                 } else {
                   // otherwise, just the caller's
-                  interProcSinks.push_back(caller);
-                  interProcSinks.push_back(dyn_cast<Value>(&arg));
+                  interProcFlows.push_back(caller);
+                  interProcFlows.push_back(dyn_cast<Value>(&arg));
                   if (Instruction* key = dyn_cast<Instruction>(caller)) {
                     //      taintedInsts->at(key).insert(srcOp);
                   }
@@ -532,22 +546,24 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai
         }
 
         unsigned int arg_num = ci->arg_size();
+        auto funName = calledFun->getName();
 #if DEBUG
-        errs() << "[Loop customUsers] Find tainted arg of " << calledFun->getName() << "\n";
+        errs() << "[Loop customUsers] Find tainted arg of " << funName << "\n";
 #endif
-        // Find the index of the tainted argument
+        // Find the param index of the tainted argument
         for (unsigned int i = 0; i < arg_num; i++) {
           auto* arg = ci->getArgOperand(i);
-          if (arg == curVal) {
-            auto funArg = calledFun->arg_begin() + i;
+          if (arg == curInst) {
+            auto param = calledFun->arg_begin() + i;
 #if DEBUG
-            errs() << "Found tainted arg: " << *arg << ", add fun arg (" << *funArg << "), the use (" << *ci << "), and orig input (" << *srcInput << ") to interProcFlows\n";
+            errs() << "[Loop customUsers] Found tainted arg of " << funName << ": " << *arg << "\n";
+            errs() << "[Loop customUsers] Add to interProcFlows the corresp. param " << *param << ", the call " << *ci << ", and srcInput " << *srcInput << "\n";
 #endif
-            interProcSinks.push_back(funArg);
+            interProcFlows.push_back(param);
             // MUST also push back the call inst.
-            interProcSinks.push_back(ci);
+            interProcFlows.push_back(ci);
             // MUST also push back the current srcInput
-            interProcSinks.push_back(srcInput);
+            interProcFlows.push_back(srcInput);
             if (auto* key = dyn_cast<Instruction>(ci)) {
               //  taintedInsts->at(key).insert(srcOp);
             }
@@ -556,8 +572,10 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai
         }
       } else if (auto* iUse = dyn_cast<Instruction>(use)) {
 #if DEBUG
-        errs() << "[Loop customUsers] use != ReturnInst & use != CallInst\n";
+        errs() << "[Loop customUsers] use != ReturnInst & use != CallInst:\n";
+        errs() << *iUse << "\n";
 #endif
+
         if (iUse->isTerminator()) {
           if (iUse->getNumSuccessors() > 1) {
 // Add control deps off of a branch.
@@ -589,7 +607,7 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai
 #if DEBUG
   errs() << "*** traverseLocal ***\n";
 #endif
-  return interProcSinks;
+  return interProcFlows;
 }
 
 inst_vec findInputInsts(Module* M) {
@@ -615,7 +633,6 @@ inst_vec findInputInsts(Module* M) {
                   errs() << "Found IO call: " << I << "\n";
 #endif
                   inputInsts.push_back(&I);
-                  break;
                 }
               }
             }
diff --git a/ocelot/AtomicRegionInference/src/include/Helpers.h b/ocelot/AtomicRegionInference/src/include/Helpers.h
index bfb6901..d5f553d 100644
--- a/ocelot/AtomicRegionInference/src/include/Helpers.h
+++ b/ocelot/AtomicRegionInference/src/include/Helpers.h
@@ -14,5 +14,6 @@ std::string getSimpleNodeLabel(const Value* Node);
 bool isAnnot(const StringRef annotName);
 void printInstInsts(const inst_insts_map& iim, bool onlyCalls = false);
 void printInsts(const inst_vec& iv);
+void printIntInsts(const std::map<int, inst_vec>& iim);
 
 #endif
\ No newline at end of file

From 7c01f9ab37804681ff6332d11cca33c556efad5d Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Sat, 3 Feb 2024 21:38:54 -0500
Subject: [PATCH 07/18] [InferAtomsPass] Minimize consistent atomic regions

...by moving non-IO instructions out of regions.
---
 benchmarks/ctests/example04.ll                | 76 +++++++++++++++++++
 .../src/InferFreshCons.cpp                    | 25 +++---
 2 files changed, 92 insertions(+), 9 deletions(-)
 create mode 100644 benchmarks/ctests/example04.ll

diff --git a/benchmarks/ctests/example04.ll b/benchmarks/ctests/example04.ll
new file mode 100644
index 0000000..d05e743
--- /dev/null
+++ b/benchmarks/ctests/example04.ll
@@ -0,0 +1,76 @@
+; ModuleID = '../../benchmarks/ctests/example04.c'
+source_filename = "../../benchmarks/ctests/example04.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  call void @atomic_start()
+  %call = call i32 @input()
+  %call1 = call i32 @input()
+  call void @atomic_end()
+  store i32 %call, ptr %x, align 4
+  store i32 %call1, ptr %y, align 4
+  %0 = load i32, ptr %y, align 4
+  call void @log(i32 noundef %0)
+  %1 = load i32, ptr %x, align 4
+  call void undef(i32 noundef %1, i32 noundef 1)
+  %2 = load i32, ptr %y, align 4
+  call void undef(i32 noundef %2, i32 noundef 1)
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index 8dc4375..b6d100a 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -196,9 +196,10 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
 #if DEBUG
           errs() << I << "\n";
 #endif
-          if (!isa<AllocaInst>(I) && find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end()) {
+          if (!isa<AllocaInst>(I)) {
+            auto shouldDelay = find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end();
 #if DEBUG
-            errs() << "Should be delayed\n";
+            errs() << "  Should" << (shouldDelay ? " " : " NOT ") << "be delayed\n";
 #endif
 
             Instruction* clone;
@@ -219,9 +220,13 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
                 }
               }
             } else if (isa<CallInst>(&I)) {
-              clone = I.clone();
+              // In case I is an IO function call, we don't clone it
+              // and instead map it to itself for referencing later
 
-              if (auto* op = dyn_cast<Instruction>(I.getOperand(0))) {
+              clone = shouldDelay ? I.clone() : &I;
+
+              if (shouldDelay && I.getNumOperands() > 1) {
+                auto* op = dyn_cast<Instruction>(I.getOperand(0));
                 inst_inst_map::iterator it = clonedInsts.find(op);
                 assert(it != clonedInsts.end());
                 clone->setOperand(0, it->second);
@@ -234,15 +239,17 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
                 assert(it != clonedInsts.end());
                 clone->setOperand(0, it->second);
               }
-            }
-            // e.g., LoadInst
-            else {
+            } else {
+              // E.g., LoadInst
               clone = I.clone();
             }
 
             clonedInsts.emplace(&I, clone);
-            toDelete.emplace(&I);
-            toDelay.push_back(clone);
+
+            if (shouldDelay) {
+              toDelete.emplace(&I);
+              toDelay.push_back(clone);
+            }
           }
         }
 

From e7b9bfa8fd712fb9c079a7d34a91867110b651a9 Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Sun, 4 Feb 2024 20:01:03 -0500
Subject: [PATCH 08/18] [WIP][InferAtomsPass] Optimize Consistent and
 FreshConsistent atomic regions

Mostly working, except optimizations done on a FreshConsistent region
need to converge back into a single (nested) region.
---
 benchmarks/ctests/example03.ll                | 16 ++--
 benchmarks/ctests/example04.ll                | 12 ++-
 .../AtomicRegionInference/src/InferAtoms.cpp  | 53 ++++++------
 .../src/InferFreshCons.cpp                    | 81 +++++++++++++++----
 .../src/include/InferAtoms.h                  |  2 +-
 .../src/include/InferFreshCons.h              |  6 +-
 6 files changed, 111 insertions(+), 59 deletions(-)

diff --git a/benchmarks/ctests/example03.ll b/benchmarks/ctests/example03.ll
index df86c33..f642b6b 100644
--- a/benchmarks/ctests/example03.ll
+++ b/benchmarks/ctests/example03.ll
@@ -45,15 +45,15 @@ entry:
   call void @atomic_start()
   %call = call i32 @input()
   store i32 %call, ptr %x, align 4
-  store i32 1, ptr %y, align 4
-  %0 = load i32, ptr %y, align 4
-  %add = add nsw i32 %0, 1
-  store i32 %add, ptr %z, align 4
-  %1 = load i32, ptr %z, align 4
-  call void @log(i32 noundef %1)
-  %2 = load i32, ptr %x, align 4
-  call void @log(i32 noundef %2)
+  %0 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %0)
   call void @atomic_end()
+  store i32 1, ptr %y, align 4
+  %1 = load i32, ptr %y, align 4
+  %2 = add nsw i32 %1, 1
+  store i32 %2, ptr %z, align 4
+  %3 = load i32, ptr %z, align 4
+  call void @log(i32 noundef %3)
   ret void
 }
 
diff --git a/benchmarks/ctests/example04.ll b/benchmarks/ctests/example04.ll
index d05e743..1040290 100644
--- a/benchmarks/ctests/example04.ll
+++ b/benchmarks/ctests/example04.ll
@@ -42,17 +42,15 @@ entry:
   %x = alloca i32, align 4
   %y = alloca i32, align 4
   call void @atomic_start()
-  %call = call i32 @input()
   %call1 = call i32 @input()
-  call void @atomic_end()
-  store i32 %call, ptr %x, align 4
   store i32 %call1, ptr %y, align 4
   %0 = load i32, ptr %y, align 4
   call void @log(i32 noundef %0)
-  %1 = load i32, ptr %x, align 4
-  call void undef(i32 noundef %1, i32 noundef 1)
-  %2 = load i32, ptr %y, align 4
-  call void undef(i32 noundef %2, i32 noundef 1)
+  call void @atomic_end()
+  call void @atomic_start()
+  %1 = call i32 @input()
+  call void @atomic_end()
+  store i32 %1, ptr %x, align 4
   ret void
 }
 
diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
index be7f108..c085c32 100644
--- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp
+++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
@@ -32,21 +32,21 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) {
   inst_insts_map inputMap = buildInputs(this->M);
   errs() << "inputMap:\n";
   printInstInsts(inputMap);
-  inst_vec toDelete;
-  getAnnotations(&consVars, &freshVars, inputMap, &toDelete);
-  // TODO: need to add unique point of call chain prefix to cons set
+  inst_vec toDeleteAnnots;
+  getAnnotations(&consVars, &freshVars, inputMap, &toDeleteAnnots);
+  // TODO: Need to add unique point of call chain prefix to cons set
 
 #if DEBUG
-  errs() << "Initial Fresh:\n";
-  for (auto& insts : freshVars)
+  errs() << "Initial Consistent:\n";
+  for (auto& [_, insts] : consVars) {
     for (auto* inst : insts) errs() << *inst << "\n";
+  }
 #endif
 
 #if DEBUG
-  errs() << "Initial Consistent:\n";
-  for (auto& [_, insts] : consVars) {
+  errs() << "Initial Fresh:\n";
+  for (auto& insts : freshVars)
     for (auto* inst : insts) errs() << *inst << "\n";
-  }
 #endif
 
 #if DEBUG
@@ -58,13 +58,13 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) {
   auto allFresh = collectFresh(freshVars, inputMap);
 
 #if DEBUG
-  errs() << "Fresh after collect: \n";
-  for (auto& varSet : allFresh)
-    for (auto* var : varSet) errs() << *var << "\n";
+  errs() << "Fresh sets after collect: \n";
+  for (auto& freshSet : allFresh)
+    for (auto* inst : freshSet) errs() << *inst << "\n";
 #endif
 
 #if DEBUG
-  errs() << "Consistent after collect: \n";
+  errs() << "Cons. sets after collect: \n";
   for (auto& [_, insts] : allConsSets)
     for (auto* inst : insts) errs() << *inst << "\n";
 #endif
@@ -72,11 +72,11 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) {
   // Consistent first
   InferFreshCons* ci = new InferFreshCons(&FAM, &M, atomStart, atomEnd);
 
-  ci->inferConsistent(allConsSets);
-  ci->inferFresh(allFresh);
+  ci->inferCons(allConsSets, &allFresh, &toDeleteAnnots);
+  ci->inferFresh(allFresh, &toDeleteAnnots);
 
   // Delete annotations
-  removeAnnotations(&toDelete);
+  removeAnnotations(toDeleteAnnots);
 
   return PreservedAnalyses::none();
 }
@@ -97,9 +97,10 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
           auto* fun = ci->getCalledFunction();
           // Various empty or null checks
           if (fun == NULL || fun->empty() || !fun->hasName()) continue;
-          // Consistent and FreshConsistent
-          // TODO: Fix FreshConsistent
-          if (isAnnot(fun->getName()) && !fun->getName().equals("Fresh")) {
+          auto funName = fun->getName();
+          // Consistent & FreshConsistent
+          if (isAnnot(funName) && !funName.equals("Fresh")) {
+            errs() << "getAnnot: " << ci << "\n";
             toDelete->push_back(ci);
             int setID;
             // Bit cast use of x, then value operand of store
@@ -245,12 +246,18 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
               printIntInsts(*consVars);
 #endif
             }
-          } else if (fun->getName().equals("Fresh")) {
+          }
+
+          // Fresh & FreshConsistent
+          if (isAnnot(funName) && !funName.equals("Consistent")) {
 #if DEBUG
             errs() << "[Loop Inst] Calls Fresh\n";
 #endif
             std::set<Instruction*> v;
-            toDelete->push_back(ci);
+            if (find(toDelete->begin(), toDelete->end(), ci) == toDelete->end()) {
+              errs() << "getAnnot: " << ci << "\n";
+              toDelete->push_back(ci);
+            }
 
 #if DEBUG
             errs() << "[Loop Inst] Print inputMap entries:\n";
@@ -340,7 +347,7 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
 #endif
 }
 
-void InferAtomsPass::removeAnnotations(inst_vec* toDelete) {
+void InferAtomsPass::removeAnnotations(inst_vec& toDelete) {
   std::vector<Function*> toDeleteF;
 
   // Delete all annotation function calls
@@ -353,7 +360,7 @@ void InferAtomsPass::removeAnnotations(inst_vec* toDelete) {
         for (; I != B.end(); I++) {
           if (auto* ci = dyn_cast<CallInst>(I)) {
             // TODO: no need to confirm in toDelete?
-            if (std::find(toDelete->begin(), toDelete->end(), &*I) != toDelete->end()) {
+            if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) {
 #if DEBUG
               errs() << "Remove call: " << *I << "\n";
 #endif
@@ -377,7 +384,7 @@ void InferAtomsPass::removeAnnotations(inst_vec* toDelete) {
   }
 
   // Delete all annotation function defs
-  for (auto F : toDeleteF) {
+  for (auto* F : toDeleteF) {
 #if DEBUG
     errs() << "Remove function " << F->getName() << "\n";
 #endif
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index b6d100a..9aff828 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -59,27 +59,36 @@ BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) {
 }
 
 // Top level region inference function -- could flatten later
-void InferFreshCons::inferConsistent(std::map<int, inst_vec> consSets) {
+void InferFreshCons::inferCons(std::map<int, inst_vec> consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots) {
+#if DEBUG
+  errs() << "=== inferConsistent ===\n";
+#endif
   for (auto& [id, set] : consSets) {
 #if DEBUG
     errs() << "[InferConsistent] Adding region for set " << id << "\n";
 #endif
-    addRegion(set, Consistent);
+    addRegion(set, freshSets, toDeleteAnnots);
   }
+#if DEBUG
+  errs() << "*** inferConsistent ***\n";
+#endif
 }
 
 // The only difference is outer map vs outer vec
-void InferFreshCons::inferFresh(inst_vec_vec freshSets) {
+void InferFreshCons::inferFresh(inst_vec_vec freshSets, inst_vec* toDeleteAnnots) {
 #if DEBUG
   errs() << "=== inferFresh ===\n";
 #endif
-  for (auto freshSet : freshSets) addRegion(freshSet, Fresh);
+
+  for (auto freshSet : freshSets) {
+    addRegion(freshSet, nullptr, toDeleteAnnots);
+  }
 #if DEBUG
   errs() << "*** inferFresh ***\n";
 #endif
 }
 
-void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
+void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_vec* toDeleteAnnots) {
 #if DEBUG
   errs() << "=== addRegion ===\n";
 #endif
@@ -129,11 +138,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
           // so only explore a caller if it's in conSet
           bool first = true;
           for (auto* use : curFun->users()) {
-            // if (regionKind == 1) {
             if (!(find(targetInsts.begin(), targetInsts.end(), use) != targetInsts.end()))
               continue;
-            // errs() << "Use: "<< *use << " is in call chain\n";
-            //}
             auto* inst = dyn_cast<Instruction>(use);
 #if DEBUGINFER
             errs() << "DEBUGINFER: examining use: " << *inst << "\n";
@@ -190,7 +196,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
         // Instructions to be delayed till the end of the block
         inst_vec toDelay;
         // (The original) instructions to be deleted
-        inst_set toDelete;
+        inst_vec toDelete;
 
         for (auto& I : *B) {
 #if DEBUG
@@ -247,7 +253,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
             clonedInsts.emplace(&I, clone);
 
             if (shouldDelay) {
-              toDelete.emplace(&I);
+              toDelete.push_back(&I);
               toDelay.push_back(clone);
             }
           }
@@ -258,13 +264,16 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
         // in the original order
         for (auto* I : toDelay) builder.Insert(I);
 
+#if DEBUG
+        errs() << "Delete originals:\n";
+#endif
         auto I = B->begin();
         // Delete the originals
         for (; I != B->end();) {
 #if DEBUG
           errs() << *I << "\n";
 #endif
-          if (toDelete.find(&*I) != toDelete.end()) {
+          if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) {
 #if DEBUG
             errs() << "Deleted\n";
 #endif
@@ -273,6 +282,33 @@ void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) {
             I++;
         }
 
+        // Sync freshSets
+        if (other != nullptr) {
+          for (auto& set : *other) {
+            for (size_t i = 0; i < set.size(); i++) {
+              auto it = find(toDelete.begin(), toDelete.end(), set[i]);
+              if (it != toDelete.end()) {
+                auto idx = std::distance(toDelete.begin(), it);
+                auto* newInst = toDelay[idx];
+                set[i] = newInst;
+              }
+            }
+          }
+        }
+
+        // Sync toDelete
+        if (toDeleteAnnots != nullptr) {
+          for (size_t i = 0; i < toDeleteAnnots->size(); i++) {
+            auto* annot = toDeleteAnnots->at(i);
+            auto it = find(toDelete.begin(), toDelete.end(), annot);
+            if (it != toDelete.end()) {
+              auto idx = std::distance(toDelete.begin(), it);
+              auto* newAnnot = toDelay[idx];
+              toDeleteAnnots->at(i) = newAnnot;
+            }
+          }
+        }
+
 #if DEBUG
         errs() << "After: " << *B << "\n";
 #endif
@@ -382,10 +418,10 @@ Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set
   errs() << "=== truncate ===\n";
 #endif
 
-#if DEBUG
-  errs() << "Set:\n";
-  printInsts(set);
-#endif
+  // #if DEBUG
+  //   errs() << "Set:\n";
+  //   printInsts(set);
+  // #endif
 
   // Truncate the front
   if (forwards) {
@@ -660,7 +696,18 @@ int InferFreshCons::getSubLength(BasicBlock* B, Instruction* end, std::vector<Ba
 
 bool InferFreshCons::sameFunction(std::map<Instruction*, BasicBlock*> blockMap) {
   auto* BComp = blockMap.begin()->second->getParent();
-  for (auto& [_, B] : blockMap)
-    if (B->getParent() != BComp) return false;
+
+  for (auto& [I, B] : blockMap) {
+    if (B->getParent() != BComp) {
+#if DEBUG
+      errs() << "Blocks are NOT in same fun\n";
+#endif
+      return false;
+    }
+  }
+
+#if DEBUG
+  errs() << "Blocks are in same fun\n";
+#endif
   return true;
 }
diff --git a/ocelot/AtomicRegionInference/src/include/InferAtoms.h b/ocelot/AtomicRegionInference/src/include/InferAtoms.h
index 217f92b..1da8c5a 100644
--- a/ocelot/AtomicRegionInference/src/include/InferAtoms.h
+++ b/ocelot/AtomicRegionInference/src/include/InferAtoms.h
@@ -29,7 +29,7 @@ struct InferAtomsPass : public PassInfoMixin<InferAtomsPass> {
   void getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_vec* freshVars, inst_insts_map inputMap, inst_vec* toDelete);
   inst_vec_vec collectFresh(inst_vec_vec startingPoints, inst_insts_map info);
   std::map<int, inst_vec> collectCons(std::map<int, inst_vec> startingPointa, inst_insts_map inputMap);
-  void removeAnnotations(inst_vec* toDelete);
+  void removeAnnotations(inst_vec& toDelete);
   void setModule(Module* _M) { M = _M; }
 
  private:
diff --git a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
index b3fcd10..e76469c 100644
--- a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
+++ b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
@@ -20,9 +20,9 @@ struct InferFreshCons {
   enum InsertKind { Start,
                     End };
 
-  void inferConsistent(std::map<int, inst_vec> allSets);
-  void inferFresh(inst_vec_vec allSets);
-  void addRegion(inst_vec conSet, RegionKind regionKind);
+  void inferCons(std::map<int, inst_vec> consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots);
+  void inferFresh(inst_vec_vec freshSets, inst_vec* toDeleteAnnots);
+  void addRegion(inst_vec conSet, inst_vec_vec* other, inst_vec* toDeleteAnnots);
   Function* findCandidate(std::map<Instruction*, BasicBlock*> blocks, Function* root);
   Instruction* insertRegionInst(InsertKind insertKind, Instruction* insertBefore);
   bool sameFunction(std::map<Instruction*, BasicBlock*> blockMap);

From ebc7cc76052a6d0e3ea28b736698a5e5e22182b9 Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Mon, 5 Feb 2024 00:16:36 -0500
Subject: [PATCH 09/18] [InferAtomsPass] Demo Consistent region optimization

---
 benchmarks/ctests/example04.c                   | 3 ++-
 benchmarks/ctests/example04.ll                  | 8 +++-----
 benchmarks/ctests/example04.orig.ll             | 2 +-
 ocelot/AtomicRegionInference/src/InferAtoms.cpp | 2 +-
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/benchmarks/ctests/example04.c b/benchmarks/ctests/example04.c
index 5cbb707..5593ac0 100644
--- a/benchmarks/ctests/example04.c
+++ b/benchmarks/ctests/example04.c
@@ -19,7 +19,8 @@ void app() {
   int y = input();
   log(y);
   Consistent(x, 1);
-  FreshConsistent(y, 1);
+  Consistent(y, 1);
+  // FreshConsistent(y, 1);
 }
 
 int main() {
diff --git a/benchmarks/ctests/example04.ll b/benchmarks/ctests/example04.ll
index 1040290..6e43a07 100644
--- a/benchmarks/ctests/example04.ll
+++ b/benchmarks/ctests/example04.ll
@@ -42,15 +42,13 @@ entry:
   %x = alloca i32, align 4
   %y = alloca i32, align 4
   call void @atomic_start()
+  %call = call i32 @input()
   %call1 = call i32 @input()
+  call void @atomic_end()
+  store i32 %call, ptr %x, align 4
   store i32 %call1, ptr %y, align 4
   %0 = load i32, ptr %y, align 4
   call void @log(i32 noundef %0)
-  call void @atomic_end()
-  call void @atomic_start()
-  %1 = call i32 @input()
-  call void @atomic_end()
-  store i32 %1, ptr %x, align 4
   ret void
 }
 
diff --git a/benchmarks/ctests/example04.orig.ll b/benchmarks/ctests/example04.orig.ll
index 8491e4d..f245c19 100644
--- a/benchmarks/ctests/example04.orig.ll
+++ b/benchmarks/ctests/example04.orig.ll
@@ -78,7 +78,7 @@ entry:
   %1 = load i32, ptr %x, align 4
   call void @Consistent(i32 noundef %1, i32 noundef 1)
   %2 = load i32, ptr %y, align 4
-  call void @FreshConsistent(i32 noundef %2, i32 noundef 1)
+  call void @Consistent(i32 noundef %2, i32 noundef 1)
   ret void
 }
 
diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
index c085c32..5480d1f 100644
--- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp
+++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
@@ -359,7 +359,7 @@ void InferAtomsPass::removeAnnotations(inst_vec& toDelete) {
         auto I = B.begin();
         for (; I != B.end(); I++) {
           if (auto* ci = dyn_cast<CallInst>(I)) {
-            // TODO: no need to confirm in toDelete?
+            // TODO: No need to confirm in toDelete?
             if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) {
 #if DEBUG
               errs() << "Remove call: " << *I << "\n";

From 3ab09aaf7845aad60edf42999b2a4704730bf19e Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Mon, 5 Feb 2024 23:25:55 -0500
Subject: [PATCH 10/18] [InferAtomsPass] Fully sound FreshConsistent region
 inference optimization

When a variable has both freshness and consistency
constraints, the overlap between the optimized
inferred atomic region is now properly handled, by
nesting them such that only the outermost bounds
count.

See benchmarks/ctests/example04.ll for an example.

Before:

```llvm
define void @app() #0 {
entry:
  %x = alloca i32, align 4
  %y = alloca i32, align 4
  call void @atomic_start()         ; <-- OUTER START
  %call = call i32 @input()
  store i32 %call, ptr %x, align 4
  call void @atomic_start()         ; <-- INNER START
  %call1 = call i32 @input()
  call void @atomic_end()           ; <-- INNER END
  store i32 %call1, ptr %y, align 4
  %0 = load i32, ptr %x, align 4
  call void @log(i32 noundef %0)
  %1 = load i32, ptr %y, align 4
  call void @log(i32 noundef %1)
  call void @atomic_end()           ; <-- OUTER END
  ret void
}
```

After:

```llvm
define void @app() #0 {
entry:
  %x = alloca i32, align 4
  %y = alloca i32, align 4
  call void @atomic_start()         ; <-- OUTER START
  %call = call i32 @input()
  call void @atomic_start()         ; <-- INNER START
  %call1 = call i32 @input()
  call void @atomic_end()           ; <-- INNER END
  store i32 %call1, ptr %y, align 4
  %0 = load i32, ptr %y, align 4
  call void @log(i32 noundef %0)
  call void @atomic_end()           ; <-- OUTER END
  store i32 %call, ptr %x, align 4
  %1 = load i32, ptr %x, align 4
  call void @log(i32 noundef %1)
  ret void
}
```
---
 benchmarks/ctests/example04.c                 |  4 +-
 benchmarks/ctests/example04.ll                |  6 +-
 benchmarks/ctests/example04.orig.ll           | 10 +-
 benchmarks/ctests/example05.c                 | 25 +++++
 benchmarks/ctests/example05.ll                | 90 +++++++++++++++++
 benchmarks/ctests/example05.orig.ll           | 98 +++++++++++++++++++
 ocelot/AtomicRegionInference/Makefile         |  5 +
 .../AtomicRegionInference/src/InferAtoms.cpp  |  2 +-
 .../src/InferFreshCons.cpp                    | 32 ++++--
 .../src/include/InferFreshCons.h              |  2 +-
 10 files changed, 258 insertions(+), 16 deletions(-)
 create mode 100644 benchmarks/ctests/example05.c
 create mode 100644 benchmarks/ctests/example05.ll
 create mode 100644 benchmarks/ctests/example05.orig.ll

diff --git a/benchmarks/ctests/example04.c b/benchmarks/ctests/example04.c
index 5593ac0..a4463c3 100644
--- a/benchmarks/ctests/example04.c
+++ b/benchmarks/ctests/example04.c
@@ -17,10 +17,10 @@ void log(int x) {
 void app() {
   int x = input();
   int y = input();
+  log(x);
   log(y);
   Consistent(x, 1);
-  Consistent(y, 1);
-  // FreshConsistent(y, 1);
+  FreshConsistent(y, 1);
 }
 
 int main() {
diff --git a/benchmarks/ctests/example04.ll b/benchmarks/ctests/example04.ll
index 6e43a07..a3a1d72 100644
--- a/benchmarks/ctests/example04.ll
+++ b/benchmarks/ctests/example04.ll
@@ -43,12 +43,16 @@ entry:
   %y = alloca i32, align 4
   call void @atomic_start()
   %call = call i32 @input()
+  call void @atomic_start()
   %call1 = call i32 @input()
   call void @atomic_end()
-  store i32 %call, ptr %x, align 4
   store i32 %call1, ptr %y, align 4
   %0 = load i32, ptr %y, align 4
   call void @log(i32 noundef %0)
+  call void @atomic_end()
+  store i32 %call, ptr %x, align 4
+  %1 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %1)
   ret void
 }
 
diff --git a/benchmarks/ctests/example04.orig.ll b/benchmarks/ctests/example04.orig.ll
index f245c19..32405f4 100644
--- a/benchmarks/ctests/example04.orig.ll
+++ b/benchmarks/ctests/example04.orig.ll
@@ -73,12 +73,14 @@ entry:
   store i32 %call, ptr %x, align 4
   %call1 = call i32 @input()
   store i32 %call1, ptr %y, align 4
-  %0 = load i32, ptr %y, align 4
+  %0 = load i32, ptr %x, align 4
   call void @log(i32 noundef %0)
-  %1 = load i32, ptr %x, align 4
-  call void @Consistent(i32 noundef %1, i32 noundef 1)
-  %2 = load i32, ptr %y, align 4
+  %1 = load i32, ptr %y, align 4
+  call void @log(i32 noundef %1)
+  %2 = load i32, ptr %x, align 4
   call void @Consistent(i32 noundef %2, i32 noundef 1)
+  %3 = load i32, ptr %y, align 4
+  call void @FreshConsistent(i32 noundef %3, i32 noundef 1)
   ret void
 }
 
diff --git a/benchmarks/ctests/example05.c b/benchmarks/ctests/example05.c
new file mode 100644
index 0000000..49fe304
--- /dev/null
+++ b/benchmarks/ctests/example05.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+
+void Fresh(int x) {}
+
+void atomic_start() {}
+void atomic_end() {}
+
+int input() { return 0; }
+int (*IO_NAME)() = input;
+
+void log(int x) {
+  printf("%d\n", x);
+}
+
+void app() {
+  int x = input();
+  for (int i = 0; i < 10; i++) {
+    log(x);
+  }
+  Fresh(x);
+}
+
+int main() {
+  app();
+}
\ No newline at end of file
diff --git a/benchmarks/ctests/example05.ll b/benchmarks/ctests/example05.ll
new file mode 100644
index 0000000..7330ad1
--- /dev/null
+++ b/benchmarks/ctests/example05.ll
@@ -0,0 +1,90 @@
+; ModuleID = '../../benchmarks/ctests/example05.c'
+source_filename = "../../benchmarks/ctests/example05.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %i = alloca i32, align 4
+  call void @atomic_start()
+  %call = call i32 @input()
+  store i32 %call, ptr %x, align 4
+  store i32 0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %entry, %for.inc
+  %0 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %0, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %1)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %2, 1
+  store i32 %inc, ptr %i, align 4
+  br label %for.cond, !llvm.loop !5
+
+for.end:                                          ; preds = %for.cond
+  call void @atomic_end()
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.mustprogress"}
diff --git a/benchmarks/ctests/example05.orig.ll b/benchmarks/ctests/example05.orig.ll
new file mode 100644
index 0000000..dc149e7
--- /dev/null
+++ b/benchmarks/ctests/example05.orig.ll
@@ -0,0 +1,98 @@
+; ModuleID = '../../benchmarks/ctests/example05.c'
+source_filename = "../../benchmarks/ctests/example05.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Fresh(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %i = alloca i32, align 4
+  %call = call i32 @input()
+  store i32 %call, ptr %x, align 4
+  store i32 0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %0, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %1)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %2, 1
+  store i32 %inc, ptr %i, align 4
+  br label %for.cond, !llvm.loop !5
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32, ptr %x, align 4
+  call void @Fresh(i32 noundef %3)
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.mustprogress"}
diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile
index 360ec68..e4d21fa 100644
--- a/ocelot/AtomicRegionInference/Makefile
+++ b/ocelot/AtomicRegionInference/Makefile
@@ -5,6 +5,7 @@ all:
 	make eg2
 	make eg3
 	make eg4
+	make eg5
 
 eg1:
 	TEST=example01 make test
@@ -14,6 +15,8 @@ eg3:
 	TEST=example03 make test
 eg4:
 	TEST=example04 make test
+eg5:
+	TEST=example05 make test
 
 run_eg1:
 	TEST=example01 make run
@@ -23,6 +26,8 @@ run_eg3:
 	TEST=example03 make run
 run_eg4:
 	TEST=example04 make run
+run_eg5:
+	TEST=example05 make run
 
 test:
 	$(MAKE) -C build
diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
index 5480d1f..2317a2b 100644
--- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp
+++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
@@ -73,7 +73,7 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) {
   InferFreshCons* ci = new InferFreshCons(&FAM, &M, atomStart, atomEnd);
 
   ci->inferCons(allConsSets, &allFresh, &toDeleteAnnots);
-  ci->inferFresh(allFresh, &toDeleteAnnots);
+  ci->inferFresh(allFresh, &allConsSets, &toDeleteAnnots);
 
   // Delete annotations
   removeAnnotations(toDeleteAnnots);
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index 9aff828..cc2eb52 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -61,27 +61,30 @@ BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) {
 // Top level region inference function -- could flatten later
 void InferFreshCons::inferCons(std::map<int, inst_vec> consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots) {
 #if DEBUG
-  errs() << "=== inferConsistent ===\n";
+  errs() << "=== inferCons ===\n";
 #endif
   for (auto& [id, set] : consSets) {
 #if DEBUG
-    errs() << "[InferConsistent] Adding region for set " << id << "\n";
+    errs() << "[inferCons] Adding region for set " << id << "\n";
 #endif
     addRegion(set, freshSets, toDeleteAnnots);
   }
 #if DEBUG
-  errs() << "*** inferConsistent ***\n";
+  errs() << "*** inferCons ***\n";
 #endif
 }
 
 // The only difference is outer map vs outer vec
-void InferFreshCons::inferFresh(inst_vec_vec freshSets, inst_vec* toDeleteAnnots) {
+void InferFreshCons::inferFresh(inst_vec_vec freshSets, std::map<int, inst_vec>* consSets, inst_vec* toDeleteAnnots) {
 #if DEBUG
   errs() << "=== inferFresh ===\n";
 #endif
 
+  std::vector<inst_vec> consVec;
+  for (auto& [_, consSet] : *consSets) consVec.push_back(consSet);
+
   for (auto freshSet : freshSets) {
-    addRegion(freshSet, nullptr, toDeleteAnnots);
+    addRegion(freshSet, &consVec, toDeleteAnnots);
   }
 #if DEBUG
   errs() << "*** inferFresh ***\n";
@@ -202,8 +205,23 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
 #if DEBUG
           errs() << I << "\n";
 #endif
-          if (!isa<AllocaInst>(I)) {
-            auto shouldDelay = find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end();
+          bool isRegionBoundary = false;
+          if (auto* ci = dyn_cast<CallInst>(&I)) {
+            auto funName = ci->getCalledFunction()->getName();
+            isRegionBoundary =
+                funName.equals("atomic_start") || funName.equals("atomic_end");
+          }
+
+          // Only attempt to schedule instruction if it's not alloca or a region boundary
+          if (!isa<AllocaInst>(I) && !isRegionBoundary) {
+            bool inExistingSet = false;
+            for (auto insts : *other) {
+              if (find(insts.begin(), insts.end(), &I) != insts.end()) {
+                inExistingSet = true;
+              }
+            }
+
+            auto shouldDelay = find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end() && !inExistingSet;
 #if DEBUG
             errs() << "  Should" << (shouldDelay ? " " : " NOT ") << "be delayed\n";
 #endif
diff --git a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
index e76469c..e8dfc8a 100644
--- a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
+++ b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
@@ -21,7 +21,7 @@ struct InferFreshCons {
                     End };
 
   void inferCons(std::map<int, inst_vec> consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots);
-  void inferFresh(inst_vec_vec freshSets, inst_vec* toDeleteAnnots);
+  void inferFresh(inst_vec_vec freshSets, std::map<int, inst_vec>* consSets, inst_vec* toDeleteAnnots);
   void addRegion(inst_vec conSet, inst_vec_vec* other, inst_vec* toDeleteAnnots);
   Function* findCandidate(std::map<Instruction*, BasicBlock*> blocks, Function* root);
   Instruction* insertRegionInst(InsertKind insertKind, Instruction* insertBefore);

From 6859d351b8a7b69056f6c8f095649360da008b2d Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Mon, 12 Feb 2024 02:27:12 -0500
Subject: [PATCH 11/18] [InferAtomsPass] Handle cases where IO calls have
 arguments and explore optimizing loops

One objective as of now is to make optimizations
even more robust by supporting more corner cases.

For an example where the IO function is
`input(int i)` (`benchmarks/ctests/example06.c`),
optimizations shouldn't incorrectly delay the
instructions related to the argument `i`, and
should instead produce:

```llvm
define void @app() #0 {
entry:
  %i = alloca i32, align 4
  %x = alloca i32, align 4
  store i32 1, ptr %i, align 4            <--
  %0 = load i32, ptr %i, align 4          <--
  call void @atomic_start()
  %call = call i32 @input(i32 noundef %0) <-- DEPENDS ON THE ABOVE
  store i32 %call, ptr %x, align 4
  %1 = load i32, ptr %x, align 4
  call void @log(i32 noundef %1)
  call void @atomic_end()
  ret void
}
```

As for loop optimizations, unlike WARio (which
targets checkpointing runtimes), loop unrolling
(i.e., creating multiple smaller copies of the
loop) doesn't help in atomic region inference,
since these loops must still be in the same
region. Thus, the "costliness" of the region won't
be lessened.

There are optimizations to be done though. For
instance, loops entirely untainted by inputs
under constraint(s) can be delayed and moved out
of atomic regions just like many other
instructions can. The difficulty with this part
lies in rewiring the complex branching/connections
among the basic blocks that form these loops,
making an optimizing analysis harder to devise.

`benchmarks/ctests/example05` illustrates an
instance where the optimization above applies.
I will be working on this as a next step.
---
 benchmarks/ctests/example05.c                 |   3 +
 benchmarks/ctests/example05.ll                |  21 +++
 benchmarks/ctests/example05.orig.ll           |  25 ++-
 benchmarks/ctests/example06.c                 |  25 +++
 benchmarks/ctests/example06.ll                |  75 +++++++++
 benchmarks/ctests/example06.orig.ll           |  93 +++++++++++
 ocelot/AtomicRegionInference/Makefile         |   5 +
 .../AtomicRegionInference/src/InferAtoms.cpp  |  56 +++----
 .../src/InferFreshCons.cpp                    | 154 +++++++++++++-----
 .../src/TaintTracker.cpp                      |  23 +--
 .../src/include/InferFreshCons.h              |   6 +-
 .../src/include/TaintTracker.h                |   4 +-
 12 files changed, 404 insertions(+), 86 deletions(-)
 create mode 100644 benchmarks/ctests/example06.c
 create mode 100644 benchmarks/ctests/example06.ll
 create mode 100644 benchmarks/ctests/example06.orig.ll

diff --git a/benchmarks/ctests/example05.c b/benchmarks/ctests/example05.c
index 49fe304..58dddf6 100644
--- a/benchmarks/ctests/example05.c
+++ b/benchmarks/ctests/example05.c
@@ -17,6 +17,9 @@ void app() {
   for (int i = 0; i < 10; i++) {
     log(x);
   }
+  for (int i = 0; i < 10; i++) {
+    log(1);
+  }
   Fresh(x);
 }
 
diff --git a/benchmarks/ctests/example05.ll b/benchmarks/ctests/example05.ll
index 7330ad1..4dfc800 100644
--- a/benchmarks/ctests/example05.ll
+++ b/benchmarks/ctests/example05.ll
@@ -41,6 +41,7 @@ define void @app() #0 {
 entry:
   %x = alloca i32, align 4
   %i = alloca i32, align 4
+  %i1 = alloca i32, align 4
   call void @atomic_start()
   %call = call i32 @input()
   store i32 %call, ptr %x, align 4
@@ -64,6 +65,25 @@ for.inc:                                          ; preds = %for.body
   br label %for.cond, !llvm.loop !5
 
 for.end:                                          ; preds = %for.cond
+  store i32 0, ptr %i1, align 4
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.inc5, %for.end
+  %3 = load i32, ptr %i1, align 4
+  %cmp3 = icmp slt i32 %3, 10
+  br i1 %cmp3, label %for.body4, label %for.end7
+
+for.body4:                                        ; preds = %for.cond2
+  call void @log(i32 noundef 1)
+  br label %for.inc5
+
+for.inc5:                                         ; preds = %for.body4
+  %4 = load i32, ptr %i1, align 4
+  %inc6 = add nsw i32 %4, 1
+  store i32 %inc6, ptr %i1, align 4
+  br label %for.cond2, !llvm.loop !7
+
+for.end7:                                         ; preds = %for.cond2
   call void @atomic_end()
   ret void
 }
@@ -88,3 +108,4 @@ attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-pr
 !4 = !{!"Homebrew clang version 17.0.2"}
 !5 = distinct !{!5, !6}
 !6 = !{!"llvm.loop.mustprogress"}
+!7 = distinct !{!7, !6}
diff --git a/benchmarks/ctests/example05.orig.ll b/benchmarks/ctests/example05.orig.ll
index dc149e7..6dcc44f 100644
--- a/benchmarks/ctests/example05.orig.ll
+++ b/benchmarks/ctests/example05.orig.ll
@@ -49,6 +49,7 @@ define void @app() #0 {
 entry:
   %x = alloca i32, align 4
   %i = alloca i32, align 4
+  %i1 = alloca i32, align 4
   %call = call i32 @input()
   store i32 %call, ptr %x, align 4
   store i32 0, ptr %i, align 4
@@ -71,8 +72,27 @@ for.inc:                                          ; preds = %for.body
   br label %for.cond, !llvm.loop !5
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32, ptr %x, align 4
-  call void @Fresh(i32 noundef %3)
+  store i32 0, ptr %i1, align 4
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.inc5, %for.end
+  %3 = load i32, ptr %i1, align 4
+  %cmp3 = icmp slt i32 %3, 10
+  br i1 %cmp3, label %for.body4, label %for.end7
+
+for.body4:                                        ; preds = %for.cond2
+  call void @log(i32 noundef 1)
+  br label %for.inc5
+
+for.inc5:                                         ; preds = %for.body4
+  %4 = load i32, ptr %i1, align 4
+  %inc6 = add nsw i32 %4, 1
+  store i32 %inc6, ptr %i1, align 4
+  br label %for.cond2, !llvm.loop !7
+
+for.end7:                                         ; preds = %for.cond2
+  %5 = load i32, ptr %x, align 4
+  call void @Fresh(i32 noundef %5)
   ret void
 }
 
@@ -96,3 +116,4 @@ attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-pr
 !4 = !{!"Homebrew clang version 17.0.2"}
 !5 = distinct !{!5, !6}
 !6 = !{!"llvm.loop.mustprogress"}
+!7 = distinct !{!7, !6}
diff --git a/benchmarks/ctests/example06.c b/benchmarks/ctests/example06.c
new file mode 100644
index 0000000..d192581
--- /dev/null
+++ b/benchmarks/ctests/example06.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+
+void Fresh(int x) {}
+void Consistent(int x, int id) {}
+
+void atomic_start() {}
+void atomic_end() {}
+
+int input(int i) { return i; }
+int (*IO_NAME)() = input;
+
+void log(int x) {
+  printf("%d\n", x);
+}
+
+void app() {
+  int i = 1;
+  int x = input(i);
+  Fresh(x);
+  log(x);
+}
+
+int main() {
+  app();
+}
\ No newline at end of file
diff --git a/benchmarks/ctests/example06.ll b/benchmarks/ctests/example06.ll
new file mode 100644
index 0000000..603f917
--- /dev/null
+++ b/benchmarks/ctests/example06.ll
@@ -0,0 +1,75 @@
+; ModuleID = '../../benchmarks/ctests/example06.c'
+source_filename = "../../benchmarks/ctests/example06.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input(i32 noundef %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, ptr %i.addr, align 4
+  %0 = load i32, ptr %i.addr, align 4
+  ret i32 %0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %i = alloca i32, align 4
+  %x = alloca i32, align 4
+  store i32 1, ptr %i, align 4
+  %0 = load i32, ptr %i, align 4
+  call void @atomic_start()
+  %call = call i32 @input(i32 noundef %0)
+  store i32 %call, ptr %x, align 4
+  %1 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %1)
+  call void @atomic_end()
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
diff --git a/benchmarks/ctests/example06.orig.ll b/benchmarks/ctests/example06.orig.ll
new file mode 100644
index 0000000..4aea90e
--- /dev/null
+++ b/benchmarks/ctests/example06.orig.ll
@@ -0,0 +1,93 @@
+; ModuleID = '../../benchmarks/ctests/example06.c'
+source_filename = "../../benchmarks/ctests/example06.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Fresh(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Consistent(i32 noundef %x, i32 noundef %id) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  %id.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  store i32 %id, ptr %id.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input(i32 noundef %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, ptr %i.addr, align 4
+  %0 = load i32, ptr %i.addr, align 4
+  ret i32 %0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %i = alloca i32, align 4
+  %x = alloca i32, align 4
+  store i32 1, ptr %i, align 4
+  %0 = load i32, ptr %i, align 4
+  %call = call i32 @input(i32 noundef %0)
+  store i32 %call, ptr %x, align 4
+  %1 = load i32, ptr %x, align 4
+  call void @Fresh(i32 noundef %1)
+  %2 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %2)
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile
index e4d21fa..d93e037 100644
--- a/ocelot/AtomicRegionInference/Makefile
+++ b/ocelot/AtomicRegionInference/Makefile
@@ -6,6 +6,7 @@ all:
 	make eg3
 	make eg4
 	make eg5
+	make eg6
 
 eg1:
 	TEST=example01 make test
@@ -17,6 +18,8 @@ eg4:
 	TEST=example04 make test
 eg5:
 	TEST=example05 make test
+eg6:
+	TEST=example06 make test
 
 run_eg1:
 	TEST=example01 make run
@@ -28,6 +31,8 @@ run_eg4:
 	TEST=example04 make run
 run_eg5:
 	TEST=example05 make run
+run_eg6:
+	TEST=example06 make run
 
 test:
 	$(MAKE) -C build
diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
index 2317a2b..42c8f3b 100644
--- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp
+++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
@@ -29,7 +29,7 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) {
   // to only go through all the declarations once.
   std::map<int, inst_vec> consVars;
   inst_vec_vec freshVars;
-  inst_insts_map inputMap = buildInputs(this->M);
+  auto [inputMap, inputInsts] = buildInputs(this->M);
   errs() << "inputMap:\n";
   printInstInsts(inputMap);
   inst_vec toDeleteAnnots;
@@ -49,10 +49,10 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) {
     for (auto* inst : insts) errs() << *inst << "\n";
 #endif
 
-#if DEBUG
-  errs() << "Print inputMap CallInst entries:\n";
-  printInstInsts(inputMap, true);
-#endif
+  // #if DEBUG
+  //   errs() << "Print inputMap CallInst entries:\n";
+  //   printInstInsts(inputMap, true);
+  // #endif
 
   auto allConsSets = collectCons(consVars, inputMap);
   auto allFresh = collectFresh(freshVars, inputMap);
@@ -72,8 +72,8 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) {
   // Consistent first
   InferFreshCons* ci = new InferFreshCons(&FAM, &M, atomStart, atomEnd);
 
-  ci->inferCons(allConsSets, &allFresh, &toDeleteAnnots);
-  ci->inferFresh(allFresh, &allConsSets, &toDeleteAnnots);
+  ci->inferCons(allConsSets, &allFresh, &toDeleteAnnots, &inputInsts);
+  ci->inferFresh(allFresh, &allConsSets, &toDeleteAnnots, &inputInsts);
 
   // Delete annotations
   removeAnnotations(toDeleteAnnots);
@@ -81,7 +81,7 @@ PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) {
   return PreservedAnalyses::none();
 }
 
-// This function finds annotated variables
+// Finds *all* variables affected by annotation
 void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_vec* freshVars,
                                     inst_insts_map inputMap, inst_vec* toDelete) {
 #if DEBUG
@@ -92,7 +92,7 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
       for (auto& I : B) {
         if (auto* ci = dyn_cast<CallInst>(&I)) {
 #if DEBUG
-          errs() << "[Loop Inst] Found call: " << *ci << "\n";
+          errs() << "[Loop I] Found call: " << *ci << "\n";
 #endif
           auto* fun = ci->getCalledFunction();
           // Various empty or null checks
@@ -100,7 +100,6 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
           auto funName = fun->getName();
           // Consistent & FreshConsistent
           if (isAnnot(funName) && !funName.equals("Fresh")) {
-            errs() << "getAnnot: " << ci << "\n";
             toDelete->push_back(ci);
             int setID;
             // Bit cast use of x, then value operand of store
@@ -251,44 +250,44 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
           // Fresh & FreshConsistent
           if (isAnnot(funName) && !funName.equals("Consistent")) {
 #if DEBUG
-            errs() << "[Loop Inst] Calls Fresh\n";
+            errs() << "[Loop I] Calls Fresh\n";
 #endif
             std::set<Instruction*> v;
             if (find(toDelete->begin(), toDelete->end(), ci) == toDelete->end()) {
-              errs() << "getAnnot: " << ci << "\n";
+              // errs() << "getAnnot: " << ci << "\n";
               toDelete->push_back(ci);
             }
 
-#if DEBUG
-            errs() << "[Loop Inst] Print inputMap entries:\n";
-            printInstInsts(inputMap);
-#endif
+            // #if DEBUG
+            //             errs() << "[Loop I] Print inputMap entries:\n";
+            //             printInstInsts(inputMap);
+            // #endif
 
             //* Can't actually remove, otherwise wrong result
             // #if DEBUG
-            //             errs() << "[Loop Inst] Remove Fresh call from inputMap\n";
+            //             errs() << "[Loop I] Remove Fresh call from inputMap\n";
             // #endif
             //             inputMap.erase(ci);
 
-            auto* arg = ci->getOperand(0);
+            auto* freshArg = ci->getOperand(0);
 #if DEBUG
-            errs() << "[Loop Inst] Fresh arg: " << *arg << "\n";
+            errs() << "[Loop I] freshArg: " << *freshArg << "\n";
 #endif
 
-            if (auto* inst = dyn_cast<Instruction>(arg)) {
+            if (auto* inst = dyn_cast<Instruction>(freshArg)) {
 #if DEBUG
-              errs() << "[Loop Inst] arg = Instruction, add to v\n";
+              errs() << "[Loop I] Add freshVar to v\n";
 #endif
               v.emplace(inst);
 
               //* Actually collect all uses (e.g., log(x))
               if (auto* li = dyn_cast<LoadInst>(inst)) {
 #if DEBUG
-                errs() << "[Loop Inst] Further arg = LoadInst\n";
+                errs() << "[Loop I] Further arg = LoadInst\n";
 #endif
                 auto* ptr = li->getPointerOperand();
 #if DEBUG
-                errs() << "[Loop Inst] Ptr operand: " << *ptr << "\n";
+                errs() << "[Loop I] Ptr operand: " << *ptr << "\n";
 #endif
                 for (auto* ptrUse : ptr->users()) {
 #if DEBUG
@@ -296,7 +295,7 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
 #endif
                   if (ptrUse != inst) {
                     if (auto* liUse = dyn_cast<LoadInst>(ptrUse)) {
-                      errs() << "[Loop ptr users] ptrUse diff from Fresh arg, add to v\n";
+                      errs() << "[Loop ptr users] ptrUse = LoadInst & diff from freshArg, add to v\n";
                       v.emplace(liUse);
                     }
                   }
@@ -306,9 +305,9 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
             // v.push_back(ci);
 
 #if DEBUG
-            errs() << "[Loop Inst] Go over arg users\n";
+            errs() << "[Loop I] Go over arg users\n";
 #endif
-            for (auto* use : arg->users()) {
+            for (auto* use : freshArg->users()) {
               if (auto* si = dyn_cast<StoreInst>(use)) {
 #if DEBUG
                 errs() << "[Loop Users] use = StoreInst, add to v: " << *si << "\n";
@@ -325,7 +324,7 @@ void InferAtomsPass::getAnnotations(std::map<int, inst_vec>* consVars, inst_vec_
 
             if (!v.empty()) {
 #if DEBUG
-              errs() << "[Loop Inst] Add v's insts to a set in freshVars:\n";
+              errs() << "[Loop I] Add v's insts to a set in freshVars:\n";
 #endif
               inst_vec tmp;
               for (auto* inst : v) {
@@ -548,13 +547,14 @@ inst_vec_vec InferAtomsPass::collectFresh(inst_vec_vec freshVars, inst_insts_map
   inst_vec_vec toReturn;
 
 #if DEBUG
-  errs() << "Go over fresh var sets\n";
+  errs() << "Go over fresh freshSets\n";
 #endif
   for (auto varSet : freshVars) {
 #if DEBUG
     errs() << "[Loop freshVars] Go over varSet:\n";
     printInsts(varSet);
 #endif
+
     inst_set unique, callChain;
     for (auto* var : varSet) {
 #if DEBUG
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index cc2eb52..c0252ff 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -59,7 +59,7 @@ BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) {
 }
 
 // Top level region inference function -- could flatten later
-void InferFreshCons::inferCons(std::map<int, inst_vec> consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots) {
+void InferFreshCons::inferCons(std::map<int, inst_vec> consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots, std::set<CallInst*>* inputInsts) {
 #if DEBUG
   errs() << "=== inferCons ===\n";
 #endif
@@ -67,7 +67,7 @@ void InferFreshCons::inferCons(std::map<int, inst_vec> consSets, inst_vec_vec* f
 #if DEBUG
     errs() << "[inferCons] Adding region for set " << id << "\n";
 #endif
-    addRegion(set, freshSets, toDeleteAnnots);
+    addRegion(set, freshSets, toDeleteAnnots, nullptr);
   }
 #if DEBUG
   errs() << "*** inferCons ***\n";
@@ -75,7 +75,7 @@ void InferFreshCons::inferCons(std::map<int, inst_vec> consSets, inst_vec_vec* f
 }
 
 // The only difference is outer map vs outer vec
-void InferFreshCons::inferFresh(inst_vec_vec freshSets, std::map<int, inst_vec>* consSets, inst_vec* toDeleteAnnots) {
+void InferFreshCons::inferFresh(inst_vec_vec freshSets, std::map<int, inst_vec>* consSets, inst_vec* toDeleteAnnots, std::set<CallInst*>* inputInsts) {
 #if DEBUG
   errs() << "=== inferFresh ===\n";
 #endif
@@ -84,14 +84,14 @@ void InferFreshCons::inferFresh(inst_vec_vec freshSets, std::map<int, inst_vec>*
   for (auto& [_, consSet] : *consSets) consVec.push_back(consSet);
 
   for (auto freshSet : freshSets) {
-    addRegion(freshSet, &consVec, toDeleteAnnots);
+    addRegion(freshSet, &consVec, toDeleteAnnots, inputInsts);
   }
 #if DEBUG
   errs() << "*** inferFresh ***\n";
 #endif
 }
 
-void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_vec* toDeleteAnnots) {
+void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_vec* toDeleteAnnots, std::set<CallInst*>* inputInsts) {
 #if DEBUG
   errs() << "=== addRegion ===\n";
 #endif
@@ -117,23 +117,24 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
   inst_inst_vec regionsFound;
   while (!regionsNeeded.empty()) {
     // Need to raise all blocks in the map until they are the same
-    auto blocks = regionsNeeded.front();
+    auto taintedBlocks = regionsNeeded.front();
     regionsNeeded.pop();
+
     // Record which functions have been traveled through
     std::set<Function*> seenFuns;
 
 #if DEBUG
     errs() << "[Loop regionsNeeded] While blocks are in diff functions\n";
 #endif
-    while (!sameFunction(blocks)) {
+    while (!sameFunction(taintedBlocks)) {
       // To think on: does this change?
-      auto* goal = findCandidate(blocks, root);
+      auto* goal = findCandidate(taintedBlocks, root);
 #if DEBUG
       errs() << "[Loop !sameFunction] Go over each targetInst\n";
 #endif
       for (auto* targetInst : targetInsts) {
         // not all blocks need to be moved up
-        auto* curFun = blocks[targetInst]->getParent();
+        auto* curFun = taintedBlocks[targetInst]->getParent();
         seenFuns.insert(curFun);
         if (curFun != goal) {
           // if more than one call:
@@ -153,13 +154,13 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
             }
             // update the original map
             if (first) {
-              blocks[targetInst] = inst->getParent();
+              taintedBlocks[targetInst] = inst->getParent();
               first = false;
             } else {
               // copy the blockmap, update, add to queue
               auto* inst = dyn_cast<Instruction>(use);
               std::map<Instruction*, BasicBlock*> copy;
-              for (auto map : blocks) copy[map.first] = map.second;
+              for (auto map : taintedBlocks) copy[map.first] = map.second;
               copy[targetInst] = inst->getParent();
               regionsNeeded.push(copy);
             }
@@ -174,25 +175,71 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
     errs() << "[Loop regionsNeeded] Start dom tree analysis\n";
 #endif
 
-    auto* homeFun = blocks.begin()->second->getParent();
+    auto* homeFun = taintedBlocks.begin()->second->getParent();
     if (homeFun == nullptr) {
 #if DEBUG
-      errs() << "[regionsNeeded] No function found\n";
+      errs() << "[Loop regionsNeeded] No function found\n";
 #endif
       continue;
     }
 #if DEBUG
-    errs() << "[regionsNeeded] Found home fun: " << homeFun->getName() << "\n";
+    errs() << "[Loop regionsNeeded] Found home fun: " << homeFun->getName() << "\n";
 #endif
 
+    // Tainted blocks right before untained blocks
+    std::vector<BasicBlock*> lastTainted;
+    BasicBlock* prevTainted;
+
+    for (auto& B : *homeFun) {
+      bool isTainted = false;
+
+      for (auto& [_, taintedBlock] : taintedBlocks) {
+        if (&B == taintedBlock) {
+          isTainted = true;
+          break;
+        }
+      }
+
+      if (!isTainted) {
+        errs() << "Not tainted: " << B << "\n";
+        if (prevTainted != nullptr && find(lastTainted.begin(), lastTainted.end(), prevTainted) == lastTainted.end())
+          lastTainted.push_back(prevTainted);
+      } else {
+        prevTainted = &B;
+      }
+    }
+
+    for (auto* B : lastTainted) {
+      errs() << "lastTainted: " << *B << "\n";
+    }
+
+    // lastTainted[1]->setNext();
+
 #if OPT
+    std::set<BasicBlock*> seenBlocks;
+    bool hasRewired = false;
+
 #if DEBUG
-    errs() << "[regionsNeeded] Go over all block insts\n";
+    errs() << "[Loop regionsNeeded] Go over all blocks\n";
 #endif
-    std::set<BasicBlock*> seenBlocks;
-    for (auto& [_, B] : blocks) {
-      if (seenBlocks.find(B) == seenBlocks.end()) {
-        seenBlocks.emplace(B);
+    for (auto& B : *homeFun) {
+      bool isTainted = false;
+      for (auto& [_, tB] : taintedBlocks) {
+        if (&B == tB) {
+          isTainted = true;
+          break;
+        }
+      }
+
+      if (!isTainted && seenBlocks.find(&B) == seenBlocks.end()) {
+        seenBlocks.emplace(&B);
+
+        errs() << "Terminator: " << *B.getTerminator() << "\n";
+      } else if (isTainted && seenBlocks.find(&B) == seenBlocks.end()) {
+#if DEBUG
+        errs() << "[Loop B] New tainted block\n";
+#endif
+        seenBlocks.emplace(&B);
 
         // A mapping from original instructions to their clones
         inst_inst_map clonedInsts;
@@ -201,7 +248,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
         // (The original) instructions to be deleted
         inst_vec toDelete;
 
-        for (auto& I : *B) {
+        for (auto& I : B) {
 #if DEBUG
           errs() << I << "\n";
 #endif
@@ -250,12 +297,28 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
               clone = shouldDelay ? I.clone() : &I;
 
               if (shouldDelay && I.getNumOperands() > 1) {
-                auto* op = dyn_cast<Instruction>(I.getOperand(0));
-                inst_inst_map::iterator it = clonedInsts.find(op);
-                assert(it != clonedInsts.end());
-                clone->setOperand(0, it->second);
+                if (auto* op = dyn_cast<Instruction>(I.getOperand(0))) {
+                  inst_inst_map::iterator it = clonedInsts.find(op);
+                  assert(it != clonedInsts.end());
+                  clone->setOperand(0, it->second);
+                }
               }
             } else if (isa<StoreInst>(&I)) {
+              // Check whether any IO function calls coming after depend on this store
+              // If so, do NOT delay
+              auto* storePtr = I.getOperand(1);
+              for (auto* user : storePtr->users()) {
+                if (auto* li = dyn_cast<LoadInst>(user)) {
+                  for (auto* liUser : li->users()) {
+                    if (auto* ci = dyn_cast<CallInst>(liUser)) {
+                      if (inputInsts->find(ci) != inputInsts->end()) {
+                        shouldDelay = false;
+                      }
+                    }
+                  }
+                }
+              }
+
               clone = I.clone();
 
               if (auto* op = dyn_cast<Instruction>(I.getOperand(0))) {
@@ -263,8 +326,19 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
                 assert(it != clonedInsts.end());
                 clone->setOperand(0, it->second);
               }
+            } else if (isa<LoadInst>(&I)) {
+              // Check whether any IO function calls coming after depend on this load
+              // If so, do NOT delay
+              for (auto* user : I.users()) {
+                if (auto* ci = dyn_cast<CallInst>(user)) {
+                  if (inputInsts->find(ci) != inputInsts->end()) {
+                    shouldDelay = false;
+                  }
+                }
+              }
+
+              clone = I.clone();
             } else {
-              // E.g., LoadInst
               clone = I.clone();
             }
 
@@ -277,7 +351,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
           }
         }
 
-        IRBuilder builder(B);
+        IRBuilder builder(&B);
         // Append each delayed instruction to the end of the block,
         // in the original order
         for (auto* I : toDelay) builder.Insert(I);
@@ -285,9 +359,9 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
 #if DEBUG
         errs() << "Delete originals:\n";
 #endif
-        auto I = B->begin();
+        auto I = B.begin();
         // Delete the originals
-        for (; I != B->end();) {
+        for (; I != B.end();) {
 #if DEBUG
           errs() << *I << "\n";
 #endif
@@ -328,7 +402,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
         }
 
 #if DEBUG
-        errs() << "After: " << *B << "\n";
+        errs() << "After: " << B << "\n";
 #endif
       }
     }
@@ -336,8 +410,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
 
     auto& domTree = FAM->getResult<DominatorTreeAnalysis>(*homeFun);
     // Find the closest point that dominates
-    auto* startDom = blocks.begin()->second;
-    for (auto& [_, B] : blocks)
+    auto* startDom = taintedBlocks.begin()->second;
+    for (auto& [_, B] : taintedBlocks)
       startDom = domTree.findNearestCommonDominator(B, startDom);
 #if DEBUG
     errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n";
@@ -352,8 +426,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
     // Flip directions for the region end
     auto& postDomTree = FAM->getResult<PostDominatorTreeAnalysis>(*homeFun);
     // Find the closest point that dominates
-    auto* endDom = blocks.begin()->second;
-    for (auto& [_, block] : blocks) {
+    auto* endDom = taintedBlocks.begin()->second;
+    for (auto& [_, taintedBlock] : taintedBlocks) {
 #if DEBUGINFER
       if (endDom != nullptr) {
         errs() << "Finding post dom of: " << getSimpleNodeLabel(map.second) << " and " << getSimpleNodeLabel(endDom) << "\n";
@@ -361,7 +435,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
         errs() << "endDom is null\n";
       }
 #endif
-      endDom = postDomTree.findNearestCommonDominator(block, endDom);
+      endDom = postDomTree.findNearestCommonDominator(taintedBlock, endDom);
     }
 
 #if DEBUG
@@ -399,7 +473,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
     }
 
     // TODO: fallback if endDom is null? Need hyper-blocks, I think
-    // possibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations?
+    // pOssibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations?
     auto* regionStart = truncate(startDom, true, targetInsts, seenFuns);
     auto* regionEnd = truncate(endDom, false, targetInsts, seenFuns);
     if (regionStart == nullptr) {
@@ -668,14 +742,14 @@ int InferFreshCons::getSubLength(BasicBlock* B, Instruction* end, std::vector<Ba
   int count = 0, max_ret = 0;
   visited.push_back(B);
 #if DEBUG
-  errs() << "Go over bb insts\n";
+  errs() << "Go over B insts\n";
 #endif
   for (auto& I : *B) {
     count++;
 
     if (&I == end) {
 #if DEBUG
-      errs() << "[Loop I] Cur inst = end, stop\n";
+      errs() << "[Loop I] I = end, stop: " << *end << "\n";
 #endif
       return count;
     }
@@ -684,7 +758,7 @@ int InferFreshCons::getSubLength(BasicBlock* B, Instruction* end, std::vector<Ba
       auto* cf = ci->getCalledFunction();
       if (!cf->empty() && cf != NULL) {
 #if DEBUG
-        errs() << "[Loop I] Cur inst = CallInst, calling: " << cf->getName() << "\n";
+        errs() << "[Loop I] I = CallInst, calling: " << cf->getName() << "\n";
 #endif
         count += cf->getInstructionCount();
       }
@@ -692,11 +766,11 @@ int InferFreshCons::getSubLength(BasicBlock* B, Instruction* end, std::vector<Ba
 
     if (I.isTerminator()) {
 #if DEBUG
-      errs() << "[Loop I] Cur inst = terminator\n";
+      errs() << "[Loop I] I = terminator: " << I << "\n";
 #endif
       for (int i = 0; i < I.getNumSuccessors(); i++) {
         auto* next = I.getSuccessor(i);
-        // already counted -- do something more fancy for loops?
+        // Already counted -- do something more fancy for loops?
         if (find(visited.begin(), visited.end(), next) != visited.end()) continue;
         int intermed = getSubLength(next, end, visited);
         if (intermed > max_ret) {
diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
index 1c400d9..45f30db 100644
--- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp
+++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
@@ -1,16 +1,16 @@
 #include "include/TaintTracker.h"
 
 // Main dataflow function to construct map of store (TODO: not just stores) insts to vars (inputs?) they depend on
-inst_insts_map buildInputs(Module* M) {
+std::pair<inst_insts_map, std::set<CallInst*>> buildInputs(Module* M) {
 #if DEBUG
   errs() << "=== buildInputs ===\n";
 #endif
 
-  inst_vec inputInsts = findInputInsts(M);
+  std::set<CallInst*> inputInsts = findInputInsts(M);
   inst_insts_map taintedInsts;
   inst_vec promotedInputs;
 
-  for (auto inputInst : inputInsts) {
+  for (auto* inputInst : inputInsts) {
 #if DEBUG
     errs() << "[Loop inputInst] inputInst: " << *inputInst << "\n";
 #endif
@@ -274,7 +274,7 @@ inst_insts_map buildInputs(Module* M) {
 #if DEBUG
   errs() << "*** buildInputs ***\n";
 #endif
-  return taintedInsts;
+  return make_pair(taintedInsts, inputInsts);
 }
 
 val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* taintedInsts, Instruction* caller) {
@@ -610,29 +610,29 @@ val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* tai
   return interProcFlows;
 }
 
-inst_vec findInputInsts(Module* M) {
+std::set<CallInst*> findInputInsts(Module* M) {
 #if DEBUG
-  errs() << "findInputInsts\n";
+  errs() << "=== findInputInsts ===\n";
 #endif
-  inst_vec inputInsts;
+  std::set<CallInst*> inputInsts;
 
   // Find IO_NAME annotations
   for (auto& gv : M->globals()) {
     if (gv.getName().starts_with("IO_NAME")) {
-      if (auto* fp = dyn_cast<Function>(gv.getInitializer())) {
+      if (auto* ioFun = dyn_cast<Function>(gv.getInitializer())) {
 #if DEBUG
-        errs() << "Found IO fun: " << fp->getName() << "\n";
+        errs() << "Found IO fun: " << ioFun->getName() << "\n";
 #endif
         // Now, search for calls to those functions
         for (auto& F : *M) {
           for (auto& B : F) {
             for (auto& I : B) {
               if (auto* ci = dyn_cast<CallInst>(&I)) {
-                if (fp == ci->getCalledFunction()) {
+                if (ioFun == ci->getCalledFunction()) {
 #if DEBUG
                   errs() << "Found IO call: " << I << "\n";
 #endif
-                  inputInsts.push_back(&I);
+                  inputInsts.insert(ci);
                 }
               }
             }
@@ -645,6 +645,7 @@ inst_vec findInputInsts(Module* M) {
     }
   }
 
+  errs() << "*** findInputInsts ***\n";
   return inputInsts;
 }
 
diff --git a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
index e8dfc8a..f26adbf 100644
--- a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
+++ b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h
@@ -20,9 +20,9 @@ struct InferFreshCons {
   enum InsertKind { Start,
                     End };
 
-  void inferCons(std::map<int, inst_vec> consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots);
-  void inferFresh(inst_vec_vec freshSets, std::map<int, inst_vec>* consSets, inst_vec* toDeleteAnnots);
-  void addRegion(inst_vec conSet, inst_vec_vec* other, inst_vec* toDeleteAnnots);
+  void inferCons(std::map<int, inst_vec> consSets, inst_vec_vec* freshSets, inst_vec* toDeleteAnnots, std::set<CallInst*>* inputInsts);
+  void inferFresh(inst_vec_vec freshSets, std::map<int, inst_vec>* consSets, inst_vec* toDeleteAnnots, std::set<CallInst*>* inputInsts);
+  void addRegion(inst_vec conSet, inst_vec_vec* other, inst_vec* toDeleteAnnots, std::set<CallInst*>* inputInsts);
   Function* findCandidate(std::map<Instruction*, BasicBlock*> blocks, Function* root);
   Instruction* insertRegionInst(InsertKind insertKind, Instruction* insertBefore);
   bool sameFunction(std::map<Instruction*, BasicBlock*> blockMap);
diff --git a/ocelot/AtomicRegionInference/src/include/TaintTracker.h b/ocelot/AtomicRegionInference/src/include/TaintTracker.h
index ea3ce03..1b06e48 100644
--- a/ocelot/AtomicRegionInference/src/include/TaintTracker.h
+++ b/ocelot/AtomicRegionInference/src/include/TaintTracker.h
@@ -5,9 +5,9 @@
 
 using namespace llvm;
 
-inst_insts_map buildInputs(Module* m);
+std::pair<inst_insts_map, std::set<CallInst*>> buildInputs(Module* m);
 val_vec traverseLocal(Value* tainted, Instruction* srcOp, inst_insts_map* buildMap, Instruction* caller);
-inst_vec findInputInsts(Module* M);
+std::set<CallInst*> findInputInsts(Module* M);
 Instruction* ptrAfterCall(Value* ptr, CallInst* ci);
 bool storePrecedesUse(Instruction* use, StoreInst* toMatch);
 inst_vec couldMatchGEPI(GetElementPtrInst* tGEPI);

From b8b00374d00ef96eb473a7696c0f23657c56abd5 Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Sat, 9 Mar 2024 22:39:57 -0500
Subject: [PATCH 12/18] [InferAtomsPass] Loop optimization

Extract untainted instructions into their own loop that doesn't go into
the atomic region.

Test plan:

`make eg5` and observe the difference between
`benchmarks/ctests/example05.ll` (optimized) and
`benchmarks/ctests/example05.orig.ll` (original), or `make eg7`.
---
 benchmarks/ctests/example05.c                 |   7 +-
 benchmarks/ctests/example05.ll                |  45 ++--
 benchmarks/ctests/example05.orig.ll           |  26 +-
 benchmarks/ctests/example07.c                 |  27 ++
 benchmarks/ctests/example07.ll                | 114 ++++++++
 .../{example04.ll => example07.orig.ll}       |  47 +++-
 ocelot/AtomicRegionInference/Makefile         |   7 +-
 .../src/InferFreshCons.cpp                    | 253 +++++++++++++++++-
 8 files changed, 462 insertions(+), 64 deletions(-)
 create mode 100644 benchmarks/ctests/example07.c
 create mode 100644 benchmarks/ctests/example07.ll
 rename benchmarks/ctests/{example04.ll => example07.orig.ll} (64%)

diff --git a/benchmarks/ctests/example05.c b/benchmarks/ctests/example05.c
index 58dddf6..e46b4fb 100644
--- a/benchmarks/ctests/example05.c
+++ b/benchmarks/ctests/example05.c
@@ -14,12 +14,13 @@ void log(int x) {
 
 void app() {
   int x = input();
-  for (int i = 0; i < 10; i++) {
-    log(x);
-  }
   for (int i = 0; i < 10; i++) {
     log(1);
+    log(x);
   }
+  // for (int i = 0; i < 10; i++) {
+  //   log(1);
+  // }
   Fresh(x);
 }
 
diff --git a/benchmarks/ctests/example05.ll b/benchmarks/ctests/example05.ll
index 4dfc800..f137154 100644
--- a/benchmarks/ctests/example05.ll
+++ b/benchmarks/ctests/example05.ll
@@ -40,8 +40,8 @@ declare i32 @printf(ptr noundef, ...) #1
 define void @app() #0 {
 entry:
   %x = alloca i32, align 4
+  %0 = alloca i32, align 4
   %i = alloca i32, align 4
-  %i1 = alloca i32, align 4
   call void @atomic_start()
   %call = call i32 @input()
   store i32 %call, ptr %x, align 4
@@ -49,42 +49,42 @@ entry:
   br label %for.cond
 
 for.cond:                                         ; preds = %entry, %for.inc
-  %0 = load i32, ptr %i, align 4
-  %cmp = icmp slt i32 %0, 10
+  %1 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %1, 10
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, ptr %x, align 4
-  call void @log(i32 noundef %1)
+  %2 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %2)
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32, ptr %i, align 4
-  %inc = add nsw i32 %2, 1
+  %3 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %3, 1
   store i32 %inc, ptr %i, align 4
   br label %for.cond, !llvm.loop !5
 
 for.end:                                          ; preds = %for.cond
-  store i32 0, ptr %i1, align 4
-  br label %for.cond2
+  call void @atomic_end()
+  store i32 0, ptr %0, align 4
+  br label %for.cond1
 
-for.cond2:                                        ; preds = %for.inc5, %for.end
-  %3 = load i32, ptr %i1, align 4
-  %cmp3 = icmp slt i32 %3, 10
-  br i1 %cmp3, label %for.body4, label %for.end7
+for.cond1:                                        ; preds = %for.inc3, %for.end
+  %4 = load i32, ptr %0, align 4
+  %5 = icmp slt i32 %4, 10
+  br i1 %5, label %for.body2, label %for.end4
 
-for.body4:                                        ; preds = %for.cond2
+for.body2:                                        ; preds = %for.cond1
   call void @log(i32 noundef 1)
-  br label %for.inc5
+  br label %for.inc3
 
-for.inc5:                                         ; preds = %for.body4
-  %4 = load i32, ptr %i1, align 4
-  %inc6 = add nsw i32 %4, 1
-  store i32 %inc6, ptr %i1, align 4
-  br label %for.cond2, !llvm.loop !7
+for.inc3:                                         ; preds = %for.body2
+  %6 = load i32, ptr %0, align 4
+  %7 = add nsw i32 %6, 1
+  store i32 %7, ptr %0, align 4
+  br label %for.cond1, !llvm.loop !5
 
-for.end7:                                         ; preds = %for.cond2
-  call void @atomic_end()
+for.end4:                                         ; preds = %for.cond1
   ret void
 }
 
@@ -108,4 +108,3 @@ attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-pr
 !4 = !{!"Homebrew clang version 17.0.2"}
 !5 = distinct !{!5, !6}
 !6 = !{!"llvm.loop.mustprogress"}
-!7 = distinct !{!7, !6}
diff --git a/benchmarks/ctests/example05.orig.ll b/benchmarks/ctests/example05.orig.ll
index 6dcc44f..c9e181a 100644
--- a/benchmarks/ctests/example05.orig.ll
+++ b/benchmarks/ctests/example05.orig.ll
@@ -49,7 +49,6 @@ define void @app() #0 {
 entry:
   %x = alloca i32, align 4
   %i = alloca i32, align 4
-  %i1 = alloca i32, align 4
   %call = call i32 @input()
   store i32 %call, ptr %x, align 4
   store i32 0, ptr %i, align 4
@@ -61,6 +60,7 @@ for.cond:                                         ; preds = %for.inc, %entry
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
+  call void @log(i32 noundef 1)
   %1 = load i32, ptr %x, align 4
   call void @log(i32 noundef %1)
   br label %for.inc
@@ -72,27 +72,8 @@ for.inc:                                          ; preds = %for.body
   br label %for.cond, !llvm.loop !5
 
 for.end:                                          ; preds = %for.cond
-  store i32 0, ptr %i1, align 4
-  br label %for.cond2
-
-for.cond2:                                        ; preds = %for.inc5, %for.end
-  %3 = load i32, ptr %i1, align 4
-  %cmp3 = icmp slt i32 %3, 10
-  br i1 %cmp3, label %for.body4, label %for.end7
-
-for.body4:                                        ; preds = %for.cond2
-  call void @log(i32 noundef 1)
-  br label %for.inc5
-
-for.inc5:                                         ; preds = %for.body4
-  %4 = load i32, ptr %i1, align 4
-  %inc6 = add nsw i32 %4, 1
-  store i32 %inc6, ptr %i1, align 4
-  br label %for.cond2, !llvm.loop !7
-
-for.end7:                                         ; preds = %for.cond2
-  %5 = load i32, ptr %x, align 4
-  call void @Fresh(i32 noundef %5)
+  %3 = load i32, ptr %x, align 4
+  call void @Fresh(i32 noundef %3)
   ret void
 }
 
@@ -116,4 +97,3 @@ attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-pr
 !4 = !{!"Homebrew clang version 17.0.2"}
 !5 = distinct !{!5, !6}
 !6 = !{!"llvm.loop.mustprogress"}
-!7 = distinct !{!7, !6}
diff --git a/benchmarks/ctests/example07.c b/benchmarks/ctests/example07.c
new file mode 100644
index 0000000..19fe98d
--- /dev/null
+++ b/benchmarks/ctests/example07.c
@@ -0,0 +1,27 @@
+#include <stdio.h>
+
+void Fresh(int x) {}
+
+void atomic_start() {}
+void atomic_end() {}
+
+int input() { return 0; }
+int (*IO_NAME)() = input;
+
+void log(int x) {
+  printf("%d\n", x);
+}
+
+void app() {
+  int x = input();
+  for (int i = 0; i < 10; i++) {
+    int y = 1;
+    log(x);
+    log(y + 2);
+  }
+  Fresh(x);
+}
+
+int main() {
+  app();
+}
\ No newline at end of file
diff --git a/benchmarks/ctests/example07.ll b/benchmarks/ctests/example07.ll
new file mode 100644
index 0000000..41881ab
--- /dev/null
+++ b/benchmarks/ctests/example07.ll
@@ -0,0 +1,114 @@
+; ModuleID = '../../benchmarks/ctests/example07.c'
+source_filename = "../../benchmarks/ctests/example07.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %0 = alloca i32, align 4
+  %i = alloca i32, align 4
+  %y = alloca i32, align 4
+  call void @atomic_start()
+  %call = call i32 @input()
+  store i32 %call, ptr %x, align 4
+  store i32 0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %entry, %for.inc
+  %1 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %1, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %2)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %3 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, ptr %i, align 4
+  br label %for.cond, !llvm.loop !5
+
+for.end:                                          ; preds = %for.cond
+  call void @atomic_end()
+  store i32 0, ptr %0, align 4
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc3, %for.end
+  %4 = load i32, ptr %0, align 4
+  %5 = icmp slt i32 %4, 10
+  br i1 %5, label %for.body2, label %for.end4
+
+for.body2:                                        ; preds = %for.cond1
+  store i32 1, ptr %y, align 4
+  %6 = load i32, ptr %y, align 4
+  %7 = add nsw i32 %6, 2
+  call void @log(i32 noundef %7)
+  br label %for.inc3
+
+for.inc3:                                         ; preds = %for.body2
+  %8 = load i32, ptr %0, align 4
+  %9 = add nsw i32 %8, 1
+  store i32 %9, ptr %0, align 4
+  br label %for.cond1, !llvm.loop !5
+
+for.end4:                                         ; preds = %for.cond1
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.mustprogress"}
diff --git a/benchmarks/ctests/example04.ll b/benchmarks/ctests/example07.orig.ll
similarity index 64%
rename from benchmarks/ctests/example04.ll
rename to benchmarks/ctests/example07.orig.ll
index a3a1d72..299b165 100644
--- a/benchmarks/ctests/example04.ll
+++ b/benchmarks/ctests/example07.orig.ll
@@ -1,11 +1,19 @@
-; ModuleID = '../../benchmarks/ctests/example04.c'
-source_filename = "../../benchmarks/ctests/example04.c"
+; ModuleID = '../../benchmarks/ctests/example07.c'
+source_filename = "../../benchmarks/ctests/example07.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
 @IO_NAME = global ptr @input, align 8
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Fresh(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
 ; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
 define void @atomic_start() #0 {
 entry:
@@ -40,19 +48,36 @@ declare i32 @printf(ptr noundef, ...) #1
 define void @app() #0 {
 entry:
   %x = alloca i32, align 4
+  %i = alloca i32, align 4
   %y = alloca i32, align 4
-  call void @atomic_start()
   %call = call i32 @input()
-  call void @atomic_start()
-  %call1 = call i32 @input()
-  call void @atomic_end()
-  store i32 %call1, ptr %y, align 4
-  %0 = load i32, ptr %y, align 4
-  call void @log(i32 noundef %0)
-  call void @atomic_end()
   store i32 %call, ptr %x, align 4
+  store i32 0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %0, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 1, ptr %y, align 4
   %1 = load i32, ptr %x, align 4
   call void @log(i32 noundef %1)
+  %2 = load i32, ptr %y, align 4
+  %add = add nsw i32 %2, 2
+  call void @log(i32 noundef %add)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %3 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, ptr %i, align 4
+  br label %for.cond, !llvm.loop !5
+
+for.end:                                          ; preds = %for.cond
+  %4 = load i32, ptr %x, align 4
+  call void @Fresh(i32 noundef %4)
   ret void
 }
 
@@ -74,3 +99,5 @@ attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-pr
 !2 = !{i32 7, !"uwtable", i32 1}
 !3 = !{i32 7, !"frame-pointer", i32 1}
 !4 = !{!"Homebrew clang version 17.0.2"}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.mustprogress"}
diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile
index d93e037..451976c 100644
--- a/ocelot/AtomicRegionInference/Makefile
+++ b/ocelot/AtomicRegionInference/Makefile
@@ -1,4 +1,4 @@
-.PHONY: clean_tests clean eg1 eg2
+.PHONY: clean_tests clean eg1 eg2 eg3 eg4 eg5 eg6 eg7 run_eg1 run_eg2 run_eg3 run_eg4 run_eg5 run_eg6 run_eg7
 
 all:
 	make eg1
@@ -7,6 +7,7 @@ all:
 	make eg4
 	make eg5
 	make eg6
+	make eg7
 
 eg1:
 	TEST=example01 make test
@@ -20,6 +21,8 @@ eg5:
 	TEST=example05 make test
 eg6:
 	TEST=example06 make test
+eg7:
+	TEST=example07 make test
 
 run_eg1:
 	TEST=example01 make run
@@ -33,6 +36,8 @@ run_eg5:
 	TEST=example05 make run
 run_eg6:
 	TEST=example06 make run
+run_eg7:
+	TEST=example07 make run
 
 test:
 	$(MAKE) -C build
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index c0252ff..65e2b0c 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -1,5 +1,6 @@
 #include "include/InferFreshCons.h"
 
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/PostDominators.h"
 
 Instruction* InferFreshCons::insertRegionInst(InsertKind insertKind, Instruction* insertBefore) {
@@ -219,6 +220,16 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
     std::set<BasicBlock*> seenBlocks;
     bool hasRewired = false;
 
+    /*
+      - Check if stmt is in a loop
+      - Remove stmt from loop
+      - Clone that loop
+      - Remove tainted insts in cloned loop
+      - Connect the two loops
+    */
+    LoopInfo& LI = FAM->getResult<LoopAnalysis>(*homeFun);
+    std::map<Loop*, std::vector<Instruction*>> untaintedClones;
+
 #if DEBUG
     errs() << "[Loop regionsNeeded] Go over all blocks\n";
 #endif
@@ -232,12 +243,14 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
       }
 
       if (!isTainted && seenBlocks.find(&B) == seenBlocks.end()) {
+#if DEBUG
+        errs() << "[Loop B] Untainted block " << B.getName() << "\n";
+#endif
         seenBlocks.emplace(&B);
-
         errs() << "Terminator: " << *B.getTerminator() << "\n";
       } else if (isTainted && seenBlocks.find(&B) == seenBlocks.end()) {
 #if DEBUG
-        errs() << "[Loop B] New tainted block\n";
+        errs() << "[Loop B] Tainted block " << B.getName() << "\n";
 #endif
         seenBlocks.emplace(&B);
 
@@ -270,7 +283,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
 
             auto shouldDelay = find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end() && !inExistingSet;
 #if DEBUG
-            errs() << "  Should" << (shouldDelay ? " " : " NOT ") << "be delayed\n";
+            errs() << "__Should" << (shouldDelay ? " " : " NOT ") << "be delayed__\n";
 #endif
 
             Instruction* clone;
@@ -290,6 +303,20 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
                   clone->setOperand(i, it->second);
                 }
               }
+
+              if (shouldDelay) {
+                auto* loop = LI.getLoopFor(&B);
+                if (loop != nullptr) {
+#if DEBUG
+                  errs() << "In loop, keep track of it\n";
+#endif
+
+                  if (untaintedClones.count(loop) == 0)
+                    untaintedClones[loop] = {clone};
+                  else
+                    untaintedClones[loop].push_back(clone);
+                }
+              }
             } else if (isa<CallInst>(&I)) {
               // In case I is an IO function call, we don't clone it
               // and instead map it to itself for referencing later
@@ -303,6 +330,20 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
                   clone->setOperand(0, it->second);
                 }
               }
+
+              if (shouldDelay) {
+                auto* loop = LI.getLoopFor(&B);
+                if (loop != nullptr) {
+#if DEBUG
+                  errs() << "In loop, keep track of it\n";
+#endif
+
+                  if (untaintedClones.count(loop) == 0)
+                    untaintedClones[loop] = {clone};
+                  else
+                    untaintedClones[loop].push_back(clone);
+                }
+              }
             } else if (isa<StoreInst>(&I)) {
               // Check whether any IO function calls coming after depend on this store
               // If so, do NOT delay
@@ -326,6 +367,20 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
                 assert(it != clonedInsts.end());
                 clone->setOperand(0, it->second);
               }
+
+              if (shouldDelay) {
+                auto* loop = LI.getLoopFor(&B);
+                if (loop != nullptr) {
+#if DEBUG
+                  errs() << "In loop, keep track of it\n";
+#endif
+
+                  if (untaintedClones.count(loop) == 0)
+                    untaintedClones[loop] = {clone};
+                  else
+                    untaintedClones[loop].push_back(clone);
+                }
+              }
             } else if (isa<LoadInst>(&I)) {
               // Check whether any IO function calls coming after depend on this load
               // If so, do NOT delay
@@ -338,6 +393,20 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
               }
 
               clone = I.clone();
+
+              if (shouldDelay) {
+                auto* loop = LI.getLoopFor(&B);
+                if (loop != nullptr) {
+#if DEBUG
+                  errs() << "In loop, keep track of it\n";
+#endif
+
+                  if (untaintedClones.count(loop) == 0)
+                    untaintedClones[loop] = {clone};
+                  else
+                    untaintedClones[loop].push_back(clone);
+                }
+              }
             } else {
               clone = I.clone();
             }
@@ -352,6 +421,9 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
         }
 
         IRBuilder builder(&B);
+#if DEBUG
+        errs() << "Add delayed instructions to end of block\n";
+#endif
         // Append each delayed instruction to the end of the block,
         // in the original order
         for (auto* I : toDelay) builder.Insert(I);
@@ -366,10 +438,10 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
           errs() << *I << "\n";
 #endif
           if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) {
+            I = I->eraseFromParent();
 #if DEBUG
             errs() << "Deleted\n";
 #endif
-            I = I->eraseFromParent();
           } else
             I++;
         }
@@ -406,6 +478,179 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
 #endif
       }
     }
+
+    for (auto& [taintedLoop, untaintedClones] : untaintedClones) {
+#if DEBUG
+      errs() << "Clone taintedLoop\n";
+#endif
+      std::vector<BasicBlock*> clonedLoop;
+      BasicBlock* forEnd;
+      Instruction* clonedAlloca;
+      Value* initVal;
+      inst_inst_map clones;
+
+      auto loopBlocks = taintedLoop->getBlocks();
+      assert(loopBlocks.size() == 3);
+      for (int i = 0; i < loopBlocks.size(); i++) {
+        auto* block = loopBlocks[i];
+        auto* clonedBlock = BasicBlock::Create(block->getContext(), block->getName(), homeFun);
+        IRBuilder builder(clonedBlock);
+
+        Instruction* prev;
+        for (auto& I : *block) {
+          auto* clonedI = I.clone();
+
+          // Only extract if untainted
+          // Covers the cond and inc blocks; they are processed on the fly due to
+          // their special role in keeping the loop going
+          if (find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end()) {
+            // for.cond
+            if (i == 0) {
+              if (auto* li = dyn_cast<LoadInst>(clonedI)) {
+                auto* ptr = li->getPointerOperand();
+
+                if (auto* ai = dyn_cast<AllocaInst>(*ptr->uses().begin())) {
+                  IRBuilder builder(ai);
+                  clonedAlloca = builder.CreateAlloca(ai->getAllocatedType());
+                }
+
+                for (auto* ptrUser : ptr->users()) {
+                  if (auto* si = dyn_cast<StoreInst>(ptrUser)) {
+                    if (!isa<BinaryOperator>(si->getOperand(0))) {
+                      initVal = si->getOperand(0);
+                    }
+                  }
+                }
+
+                li->setOperand(0, clonedAlloca);
+                prev = li;
+              } else if (auto* ci = dyn_cast<CmpInst>(clonedI)) {
+                // TODO: Check if operand originates from the current loop
+                if (isa<LoadInst>(ci->getOperand(0))) {
+                  ci->setOperand(0, prev);
+                }
+                prev = ci;
+              } else if (auto* bi = dyn_cast<BranchInst>(clonedI)) {
+                assert(bi->isConditional());
+                bi->setCondition(prev);
+
+                if (auto* B = dyn_cast<BasicBlock>(bi->getOperand(1))) {
+                  forEnd = B;
+                }
+              }
+            }
+
+            // for.inc
+            else if (i == 2) {
+              if (auto* li = dyn_cast<LoadInst>(clonedI)) {
+                li->setOperand(0, clonedAlloca);
+                prev = li;
+              } else if (auto* bi = dyn_cast<BinaryOperator>(clonedI)) {
+                auto* lhs = bi->getOperand(0);
+                if (isa<LoadInst>(lhs)) bi->setOperand(0, prev);
+                auto* rhs = bi->getOperand(1);
+                if (isa<LoadInst>(rhs)) bi->setOperand(1, prev);
+                prev = bi;
+              } else if (auto* si = dyn_cast<StoreInst>(clonedI)) {
+                si->setOperand(0, prev);
+                si->setOperand(1, clonedAlloca);
+              }
+            }
+
+            clones.emplace(&I, clonedI);
+            builder.Insert(clonedI);
+          }
+        }
+
+        // for.body
+        // Performs a standard sound cloning procedure (on each operand);
+        // the instructions in the body are unrelated to the loop except the final
+        // branch instruction
+        for (auto& I : *clonedBlock) {
+          if (i == 1) {
+            if (auto* si = dyn_cast<StoreInst>(&I)) {
+              for (int i = 0; i < si->getNumOperands(); i++) {
+                auto* I = dyn_cast<Instruction>(si->getOperand(i));
+                if (I != nullptr) {
+                  inst_inst_map::iterator it = clones.find(I);
+                  if (it != clones.end()) si->setOperand(i, it->second);
+                }
+              }
+            } else if (auto* li = dyn_cast<LoadInst>(&I)) {
+              auto* ptr = dyn_cast<Instruction>(li->getPointerOperand());
+              inst_inst_map::iterator it = clones.find(ptr);
+              if (it != clones.end()) li->setOperand(0, it->second);
+            } else if (auto* bi = dyn_cast<BinaryOperator>(&I)) {
+              auto* lhs = dyn_cast<Instruction>(bi->getOperand(0));
+              inst_inst_map::iterator lhsIt = clones.find(lhs);
+              if (lhsIt != clones.end()) bi->setOperand(0, lhsIt->second);
+
+              auto* rhs = dyn_cast<Instruction>(bi->getOperand(1));
+              inst_inst_map::iterator rhsIt = clones.find(rhs);
+              if (rhsIt != clones.end()) bi->setOperand(0, rhsIt->second);
+            } else if (auto* ci = dyn_cast<CallInst>(&I)) {
+              for (int i = 0; i < ci->getNumOperands() - 1; i++) {
+                auto* arg = dyn_cast<Instruction>(ci->getOperand(i));
+                inst_inst_map::iterator argIt = clones.find(arg);
+                if (argIt != clones.end()) ci->setOperand(i, argIt->second);
+              }
+            }
+          }
+        }
+
+        clonedLoop.push_back(clonedBlock);
+      }
+
+      BasicBlock* forEndClone = BasicBlock::Create(forEnd->getContext(), forEnd->getName(), homeFun);
+      IRBuilder builder(forEndClone);
+      for (auto& I : *forEnd) {
+        if (!isa<CallInst>(I) && !isa<LoadInst>(I)) {
+          auto* clone = I.clone();
+          builder.Insert(clone);
+        }
+
+        if (isa<ReturnInst>(I)) {
+          IRBuilder builder(&I);
+          builder.CreateBr(clonedLoop[0]);
+          I.removeFromParent();
+          break;
+        }
+      }
+
+      for (auto& I : *forEnd) {
+        if (auto* bi = dyn_cast<BranchInst>(&I)) {
+          IRBuilder builder(bi);
+          builder.CreateStore(initVal, clonedAlloca);
+        }
+      }
+
+      // Connect the blocks of the new loop
+      for (int i = 0; i < clonedLoop.size(); i++) {
+        auto* block = clonedLoop[i];
+        for (auto& I : *block) {
+          if (auto* bi = dyn_cast<BranchInst>(&I)) {
+            // for.cond
+            if (i == 0) {
+              bi->setSuccessor(0, clonedLoop[1]);
+              bi->setSuccessor(1, forEndClone);
+            }
+            // for.body
+            else if (i == 1) {
+              bi->setSuccessor(0, clonedLoop[2]);
+            }
+            // for.inc
+            else if (i == 2) {
+              bi->setSuccessor(0, clonedLoop[0]);
+            }
+          }
+        }
+        errs() << *block << "\n";
+      }
+
+      for (auto* untaintedClone : untaintedClones) {
+        untaintedClone->removeFromParent();
+      }
+    }
 #endif
 
     auto& domTree = FAM->getResult<DominatorTreeAnalysis>(*homeFun);

From db36f5da8b7cb2687b39c63c5a22cd2f8bbbe15c Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Mon, 11 Mar 2024 18:37:00 -0400
Subject: [PATCH 13/18] [InferAtomsPass] Optimization improvement for
 parameterized IO calls and refactoring for concision

Now, the instructions "tainted" by an IO call will be included in the
fresh set as well, making it so that they remain preceeding the IO call,
within their atomic region. This is a more fundamental solution than
before, where exceptions were only made to these instructions during
optimization.

The optimization now has a more modular structure where common
instruction patching logic is extracted into a reusable procedure to be
run more than once (`Helpers::patchClonedBlock`). It comes into play
after cloning a basic block, to rewire its instructions to properly
reference each other.

Test plan:

`make`
---
 benchmarks/ctests/example04.ll                |  76 ++++
 benchmarks/ctests/example05.ll                |   4 +-
 benchmarks/ctests/example06.ll                |   2 +-
 benchmarks/ctests/example07.ll                |   4 +-
 ocelot/AtomicRegionInference/Makefile         |   7 +-
 ocelot/AtomicRegionInference/src/Helpers.cpp  |  42 ++-
 .../AtomicRegionInference/src/InferAtoms.cpp  |  38 +-
 .../src/InferFreshCons.cpp                    | 329 ++++--------------
 .../src/TaintTracker.cpp                      |   1 -
 .../src/include/Helpers.h                     |   1 +
 10 files changed, 222 insertions(+), 282 deletions(-)
 create mode 100644 benchmarks/ctests/example04.ll

diff --git a/benchmarks/ctests/example04.ll b/benchmarks/ctests/example04.ll
new file mode 100644
index 0000000..a3a1d72
--- /dev/null
+++ b/benchmarks/ctests/example04.ll
@@ -0,0 +1,76 @@
+; ModuleID = '../../benchmarks/ctests/example04.c'
+source_filename = "../../benchmarks/ctests/example04.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  call void @atomic_start()
+  %call = call i32 @input()
+  call void @atomic_start()
+  %call1 = call i32 @input()
+  call void @atomic_end()
+  store i32 %call1, ptr %y, align 4
+  %0 = load i32, ptr %y, align 4
+  call void @log(i32 noundef %0)
+  call void @atomic_end()
+  store i32 %call, ptr %x, align 4
+  %1 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %1)
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
diff --git a/benchmarks/ctests/example05.ll b/benchmarks/ctests/example05.ll
index f137154..aee5708 100644
--- a/benchmarks/ctests/example05.ll
+++ b/benchmarks/ctests/example05.ll
@@ -48,7 +48,7 @@ entry:
   store i32 0, ptr %i, align 4
   br label %for.cond
 
-for.cond:                                         ; preds = %entry, %for.inc
+for.cond:                                         ; preds = %entry, %for.inc, <null operand!>
   %1 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %1, 10
   br i1 %cmp, label %for.body, label %for.end
@@ -58,7 +58,7 @@ for.body:                                         ; preds = %for.cond
   call void @log(i32 noundef %2)
   br label %for.inc
 
-for.inc:                                          ; preds = %for.body
+for.inc:                                          ; preds = %for.body, <null operand!>
   %3 = load i32, ptr %i, align 4
   %inc = add nsw i32 %3, 1
   store i32 %inc, ptr %i, align 4
diff --git a/benchmarks/ctests/example06.ll b/benchmarks/ctests/example06.ll
index 603f917..fad0c8b 100644
--- a/benchmarks/ctests/example06.ll
+++ b/benchmarks/ctests/example06.ll
@@ -44,9 +44,9 @@ define void @app() #0 {
 entry:
   %i = alloca i32, align 4
   %x = alloca i32, align 4
+  call void @atomic_start()
   store i32 1, ptr %i, align 4
   %0 = load i32, ptr %i, align 4
-  call void @atomic_start()
   %call = call i32 @input(i32 noundef %0)
   store i32 %call, ptr %x, align 4
   %1 = load i32, ptr %x, align 4
diff --git a/benchmarks/ctests/example07.ll b/benchmarks/ctests/example07.ll
index 41881ab..e12917a 100644
--- a/benchmarks/ctests/example07.ll
+++ b/benchmarks/ctests/example07.ll
@@ -49,7 +49,7 @@ entry:
   store i32 0, ptr %i, align 4
   br label %for.cond
 
-for.cond:                                         ; preds = %entry, %for.inc
+for.cond:                                         ; preds = %entry, %for.inc, <null operand!>
   %1 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %1, 10
   br i1 %cmp, label %for.body, label %for.end
@@ -59,7 +59,7 @@ for.body:                                         ; preds = %for.cond
   call void @log(i32 noundef %2)
   br label %for.inc
 
-for.inc:                                          ; preds = %for.body
+for.inc:                                          ; preds = %for.body, <null operand!>
   %3 = load i32, ptr %i, align 4
   %inc = add nsw i32 %3, 1
   store i32 %inc, ptr %i, align 4
diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile
index 451976c..b92b0ff 100644
--- a/ocelot/AtomicRegionInference/Makefile
+++ b/ocelot/AtomicRegionInference/Makefile
@@ -1,4 +1,4 @@
-.PHONY: clean_tests clean eg1 eg2 eg3 eg4 eg5 eg6 eg7 run_eg1 run_eg2 run_eg3 run_eg4 run_eg5 run_eg6 run_eg7
+.PHONY: clean_tests clean eg1 eg2 eg3 eg4 eg5 eg6 eg7 eg8 run_eg1 run_eg2 run_eg3 run_eg4 run_eg5 run_eg6 run_eg7 run_eg8
 
 all:
 	make eg1
@@ -8,6 +8,7 @@ all:
 	make eg5
 	make eg6
 	make eg7
+	make eg8
 
 eg1:
 	TEST=example01 make test
@@ -23,6 +24,8 @@ eg6:
 	TEST=example06 make test
 eg7:
 	TEST=example07 make test
+eg8:
+	TEST=example08 make test
 
 run_eg1:
 	TEST=example01 make run
@@ -38,6 +41,8 @@ run_eg6:
 	TEST=example06 make run
 run_eg7:
 	TEST=example07 make run
+run_eg8:
+	TEST=example08 make run
 
 test:
 	$(MAKE) -C build
diff --git a/ocelot/AtomicRegionInference/src/Helpers.cpp b/ocelot/AtomicRegionInference/src/Helpers.cpp
index 5ca398e..896250d 100644
--- a/ocelot/AtomicRegionInference/src/Helpers.cpp
+++ b/ocelot/AtomicRegionInference/src/Helpers.cpp
@@ -2,9 +2,6 @@
 
 std::string getSimpleNodeLabel(const Value* node) {
   if (node->hasName()) {
-    // #if DEBUG
-    //     errs() << "Node has name\n";
-    // #endif
     return node->getName().str();
   }
 
@@ -42,3 +39,42 @@ void printIntInsts(const std::map<int, inst_vec>& iim) {
     errs() << "\n";
   }
 }
+
+/**
+ * Given a freshly cloned basic block, repair references among its
+ * instructions based on a mapping from the original instructions
+ * to their clones.
+ *
+ * @param block The cloned basic block
+ * @param clonedInsts The mapping from original to cloned instructions
+ */
+void patchClonedBlock(BasicBlock* block, inst_inst_map clonedInsts) {
+  for (auto& I : *block) {
+    if (auto* si = dyn_cast<StoreInst>(&I)) {
+      for (int i = 0; i < si->getNumOperands(); i++) {
+        auto* operand = dyn_cast<Instruction>(si->getOperand(i));
+        if (operand != nullptr) {
+          inst_inst_map::iterator it = clonedInsts.find(operand);
+          if (it != clonedInsts.end()) si->setOperand(i, it->second);
+        }
+      }
+    } else if (auto* li = dyn_cast<LoadInst>(&I)) {
+      auto* ptr = dyn_cast<Instruction>(li->getPointerOperand());
+      inst_inst_map::iterator it = clonedInsts.find(ptr);
+      if (it != clonedInsts.end()) li->setOperand(0, it->second);
+    } else if (auto* bi = dyn_cast<BinaryOperator>(&I)) {
+      for (unsigned i = 0; i < bi->getNumOperands(); i++) {
+        auto* operand = dyn_cast<Instruction>(bi->getOperand(i));
+        inst_inst_map::iterator it = clonedInsts.find(operand);
+        if (it != clonedInsts.end()) bi->setOperand(i, it->second);
+      }
+    } else if (auto* ci = dyn_cast<CallInst>(&I)) {
+      // The last operand is the called function
+      for (unsigned i = 0; i < ci->getNumOperands() - 1; i++) {
+        auto* arg = dyn_cast<Instruction>(ci->getOperand(i));
+        inst_inst_map::iterator argIt = clonedInsts.find(arg);
+        if (argIt != clonedInsts.end()) ci->setOperand(i, argIt->second);
+      }
+    }
+  }
+}
diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
index 42c8f3b..428adab 100644
--- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp
+++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
@@ -547,7 +547,7 @@ inst_vec_vec InferAtomsPass::collectFresh(inst_vec_vec freshVars, inst_insts_map
   inst_vec_vec toReturn;
 
 #if DEBUG
-  errs() << "Go over fresh freshSets\n";
+  errs() << "Go over freshSets\n";
 #endif
   for (auto varSet : freshVars) {
 #if DEBUG
@@ -572,11 +572,41 @@ inst_vec_vec InferAtomsPass::collectFresh(inst_vec_vec freshVars, inst_insts_map
 #endif
         unique.insert(use);
 
-        for (auto* input : inputMap[use]) {
 #if DEBUG
-          errs() << "[Loop inputMap[use]] Add src input of use to unique: " << *input << "\n";
+        errs() << "[Loop uses] Go over each src input of use\n";
 #endif
-          unique.insert(input);
+        for (auto* input : inputMap[use]) {
+#if DEBUG
+          errs() << "Src input: " << *input << "\n";
+          errs() << "Add insts tainted by it to unique\n";
+#endif
+
+          if (unique.count(input) == 0) {
+            unique.insert(input);
+
+            auto* ci = dyn_cast<CallInst>(input);
+            std::queue<Instruction*> toExplore;
+            toExplore.push(ci);
+
+            while (!toExplore.empty()) {
+              auto* I = toExplore.front();
+              toExplore.pop();
+
+              // TODO: If there's no tainted inst in the chain,
+              // then don't need to include in unique
+              errs() << "[Loop inputInst] Found inst tainted by src input: " << *I << "\n";
+              if (isa<CallInst>(I) || isa<LoadInst>(I) || isa<StoreInst>(I)) {
+                unique.insert(I);
+                for (auto& operand : I->operands())
+                  if (auto* operandI = dyn_cast<Instruction>(operand))
+                    toExplore.push(operandI);
+              } else if (auto* ai = dyn_cast<AllocaInst>(I)) {
+                for (auto* user : ai->users())
+                  if (auto* userI = dyn_cast<Instruction>(user))
+                    if (unique.count(userI) == 0) toExplore.push(userI);
+              }
+            }
+          }
         }
       }
 
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index 65e2b0c..81c7364 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -104,8 +104,10 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
 #if DEBUG
   errs() << "Build map from inst to bb\n";
 #endif
-  for (auto* targetInst : targetInsts)
+  for (auto* targetInst : targetInsts) {
+    // errs() << "Check: " << *targetInst << "\n";
     targetBlocks[targetInst] = targetInst->getParent();
+  }
 
 #if DEBUG
   errs() << "Add map to regionsNeeded\n";
@@ -187,48 +189,11 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
     errs() << "[Loop regionsNeeded] Found home fun: " << homeFun->getName() << "\n";
 #endif
 
-    // Tainted blocks right before untained blocks
-    std::vector<BasicBlock*> lastTainted;
-    BasicBlock* prevTainted;
-
-    for (auto& B : *homeFun) {
-      bool isTainted = false;
-
-      for (auto& [_, taintedBlock] : taintedBlocks) {
-        if (&B == taintedBlock) {
-          isTainted = true;
-          break;
-        }
-      }
-
-      if (!isTainted) {
-        errs() << "Not tainted: " << B << "\n";
-        if (prevTainted != nullptr && find(lastTainted.begin(), lastTainted.end(), prevTainted) == lastTainted.end())
-          lastTainted.push_back(prevTainted);
-      } else {
-        prevTainted = &B;
-      }
-    }
-
-    for (auto* B : lastTainted) {
-      errs() << "lastTainted: " << *B << "\n";
-    }
-
-    // lastTainted[1]->setNext();
-
 #if OPT
     std::set<BasicBlock*> seenBlocks;
-    bool hasRewired = false;
-
-    /*
-      - Check if stmt is in a loop
-      - Remove stmt from loop
-      - Clone that loop
-      - Remove tainted insts in cloned loop
-      - Connect the two loops
-    */
+
     LoopInfo& LI = FAM->getResult<LoopAnalysis>(*homeFun);
-    std::map<Loop*, std::vector<Instruction*>> untaintedClones;
+    std::map<Loop*, std::vector<Instruction*>> untaintedLoopClones;
 
 #if DEBUG
     errs() << "[Loop regionsNeeded] Go over all blocks\n";
@@ -242,209 +207,64 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
         }
       }
 
-      if (!isTainted && seenBlocks.find(&B) == seenBlocks.end()) {
+      if (isTainted && seenBlocks.find(&B) == seenBlocks.end()) {
 #if DEBUG
-        errs() << "[Loop B] Untainted block " << B.getName() << "\n";
-#endif
-        seenBlocks.emplace(&B);
-        errs() << "Terminator: " << *B.getTerminator() << "\n";
-      } else if (isTainted && seenBlocks.find(&B) == seenBlocks.end()) {
-#if DEBUG
-        errs() << "[Loop B] Tainted block " << B.getName() << "\n";
+        errs() << "Tainted block " << B.getName() << ":\n";
 #endif
         seenBlocks.emplace(&B);
 
-        // A mapping from original instructions to their clones
-        inst_inst_map clonedInsts;
-        // Instructions to be delayed till the end of the block
-        inst_vec toDelay;
-        // (The original) instructions to be deleted
-        inst_vec toDelete;
+        inst_vec toDelete, toDelay;
+        inst_inst_map instClones;
 
         for (auto& I : B) {
 #if DEBUG
           errs() << I << "\n";
 #endif
-          bool isRegionBoundary = false;
-          if (auto* ci = dyn_cast<CallInst>(&I)) {
-            auto funName = ci->getCalledFunction()->getName();
-            isRegionBoundary =
-                funName.equals("atomic_start") || funName.equals("atomic_end");
-          }
 
-          // Only attempt to schedule instruction if it's not alloca or a region boundary
-          if (!isa<AllocaInst>(I) && !isRegionBoundary) {
-            bool inExistingSet = false;
-            for (auto insts : *other) {
-              if (find(insts.begin(), insts.end(), &I) != insts.end()) {
-                inExistingSet = true;
-              }
+          bool inExistingSet = false;
+          for (auto insts : *other) {
+            if (find(insts.begin(), insts.end(), &I) != insts.end()) {
+              inExistingSet = true;
+              break;
             }
+          }
 
-            auto shouldDelay = find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end() && !inExistingSet;
-#if DEBUG
-            errs() << "__Should" << (shouldDelay ? " " : " NOT ") << "be delayed__\n";
-#endif
-
-            Instruction* clone;
-
-            // Clone each untainted instruction to be appended to
-            // the end of the basic block, in the original order
-            if (isa<BinaryOperator>(I)) {
-              clone = I.clone();
-
-              for (int i = 0; i < 2; i++) {
-                if (auto* op = dyn_cast<Instruction>(I.getOperand(i))) {
-                  // Since operands don't get cloned along the eway,
-                  // look up the clone of each operand...
-                  inst_inst_map::iterator it = clonedInsts.find(op);
-                  assert(it != clonedInsts.end());
-                  // ...and overwrite the original operand with it
-                  clone->setOperand(i, it->second);
-                }
-              }
-
-              if (shouldDelay) {
-                auto* loop = LI.getLoopFor(&B);
-                if (loop != nullptr) {
-#if DEBUG
-                  errs() << "In loop, keep track of it\n";
-#endif
-
-                  if (untaintedClones.count(loop) == 0)
-                    untaintedClones[loop] = {clone};
-                  else
-                    untaintedClones[loop].push_back(clone);
-                }
-              }
-            } else if (isa<CallInst>(&I)) {
-              // In case I is an IO function call, we don't clone it
-              // and instead map it to itself for referencing later
-
-              clone = shouldDelay ? I.clone() : &I;
-
-              if (shouldDelay && I.getNumOperands() > 1) {
-                if (auto* op = dyn_cast<Instruction>(I.getOperand(0))) {
-                  inst_inst_map::iterator it = clonedInsts.find(op);
-                  assert(it != clonedInsts.end());
-                  clone->setOperand(0, it->second);
-                }
-              }
-
-              if (shouldDelay) {
-                auto* loop = LI.getLoopFor(&B);
-                if (loop != nullptr) {
-#if DEBUG
-                  errs() << "In loop, keep track of it\n";
-#endif
-
-                  if (untaintedClones.count(loop) == 0)
-                    untaintedClones[loop] = {clone};
-                  else
-                    untaintedClones[loop].push_back(clone);
-                }
-              }
-            } else if (isa<StoreInst>(&I)) {
-              // Check whether any IO function calls coming after depend on this store
-              // If so, do NOT delay
-              auto* storePtr = I.getOperand(1);
-              for (auto* user : storePtr->users()) {
-                if (auto* li = dyn_cast<LoadInst>(user)) {
-                  for (auto* liUser : li->users()) {
-                    if (auto* ci = dyn_cast<CallInst>(liUser)) {
-                      if (inputInsts->find(ci) != inputInsts->end()) {
-                        shouldDelay = false;
-                      }
-                    }
-                  }
-                }
-              }
-
-              clone = I.clone();
-
-              if (auto* op = dyn_cast<Instruction>(I.getOperand(0))) {
-                inst_inst_map::iterator it = clonedInsts.find(op);
-                assert(it != clonedInsts.end());
-                clone->setOperand(0, it->second);
-              }
+          bool isAtomicBoundary = false;
+          if (auto* ci = dyn_cast<CallInst>(&I)) {
+            auto* calledFun = ci->getCalledFunction();
+            if (calledFun == this->atomStart || calledFun == this->atomEnd)
+              isAtomicBoundary = true;
+          }
 
-              if (shouldDelay) {
-                auto* loop = LI.getLoopFor(&B);
-                if (loop != nullptr) {
+          // TODO: Exception with the entry block to a loop (prepone untainted insts instead)
+          if (find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end() && !isa<AllocaInst>(&I) && !inExistingSet && !isAtomicBoundary) {
 #if DEBUG
-                  errs() << "In loop, keep track of it\n";
+            errs() << "__Should be delayed__\n";
 #endif
+            auto* clone = I.clone();
+            instClones.emplace(&I, clone);
+            toDelete.push_back(&I);
+            toDelay.push_back(clone);
 
-                  if (untaintedClones.count(loop) == 0)
-                    untaintedClones[loop] = {clone};
-                  else
-                    untaintedClones[loop].push_back(clone);
-                }
-              }
-            } else if (isa<LoadInst>(&I)) {
-              // Check whether any IO function calls coming after depend on this load
-              // If so, do NOT delay
-              for (auto* user : I.users()) {
-                if (auto* ci = dyn_cast<CallInst>(user)) {
-                  if (inputInsts->find(ci) != inputInsts->end()) {
-                    shouldDelay = false;
-                  }
-                }
-              }
-
-              clone = I.clone();
-
-              if (shouldDelay) {
-                auto* loop = LI.getLoopFor(&B);
-                if (loop != nullptr) {
+            auto* loop = LI.getLoopFor(&B);
+            if (loop != nullptr) {
 #if DEBUG
-                  errs() << "In loop, keep track of it\n";
+              errs() << "__In loop, keep track of it__\n";
 #endif
-
-                  if (untaintedClones.count(loop) == 0)
-                    untaintedClones[loop] = {clone};
-                  else
-                    untaintedClones[loop].push_back(clone);
-                }
-              }
-            } else {
-              clone = I.clone();
-            }
-
-            clonedInsts.emplace(&I, clone);
-
-            if (shouldDelay) {
-              toDelete.push_back(&I);
-              toDelay.push_back(clone);
+              if (untaintedLoopClones.count(loop) == 0)
+                untaintedLoopClones[loop] = {clone};
+              else
+                untaintedLoopClones[loop].push_back(clone);
             }
           }
         }
 
-        IRBuilder builder(&B);
-#if DEBUG
-        errs() << "Add delayed instructions to end of block\n";
-#endif
-        // Append each delayed instruction to the end of the block,
-        // in the original order
-        for (auto* I : toDelay) builder.Insert(I);
+        for (auto* I : toDelete) I->removeFromParent();
 
-#if DEBUG
-        errs() << "Delete originals:\n";
-#endif
-        auto I = B.begin();
-        // Delete the originals
-        for (; I != B.end();) {
-#if DEBUG
-          errs() << *I << "\n";
-#endif
-          if (find(toDelete.begin(), toDelete.end(), &*I) != toDelete.end()) {
-            I = I->eraseFromParent();
-#if DEBUG
-            errs() << "Deleted\n";
-#endif
-          } else
-            I++;
-        }
+        IRBuilder BBuilder(&B);
+        for (auto* I : toDelay) BBuilder.Insert(I);
+
+        patchClonedBlock(&B, instClones);
 
         // Sync freshSets
         if (other != nullptr) {
@@ -479,18 +299,20 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
       }
     }
 
-    for (auto& [taintedLoop, untaintedClones] : untaintedClones) {
+    for (auto& [taintedLoop, untaintedClones] : untaintedLoopClones) {
 #if DEBUG
       errs() << "Clone taintedLoop\n";
 #endif
+      errs() << "ayo\n";
       std::vector<BasicBlock*> clonedLoop;
       BasicBlock* forEnd;
       Instruction* clonedAlloca;
       Value* initVal;
-      inst_inst_map clones;
+      inst_inst_map instClones;
 
       auto loopBlocks = taintedLoop->getBlocks();
       assert(loopBlocks.size() == 3);
+
       for (int i = 0; i < loopBlocks.size(); i++) {
         auto* block = loopBlocks[i];
         auto* clonedBlock = BasicBlock::Create(block->getContext(), block->getName(), homeFun);
@@ -557,7 +379,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
               }
             }
 
-            clones.emplace(&I, clonedI);
+            instClones.emplace(&I, clonedI);
             builder.Insert(clonedI);
           }
         }
@@ -566,37 +388,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
         // Performs a standard sound cloning procedure (on each operand);
         // the instructions in the body are unrelated to the loop except the final
         // branch instruction
-        for (auto& I : *clonedBlock) {
-          if (i == 1) {
-            if (auto* si = dyn_cast<StoreInst>(&I)) {
-              for (int i = 0; i < si->getNumOperands(); i++) {
-                auto* I = dyn_cast<Instruction>(si->getOperand(i));
-                if (I != nullptr) {
-                  inst_inst_map::iterator it = clones.find(I);
-                  if (it != clones.end()) si->setOperand(i, it->second);
-                }
-              }
-            } else if (auto* li = dyn_cast<LoadInst>(&I)) {
-              auto* ptr = dyn_cast<Instruction>(li->getPointerOperand());
-              inst_inst_map::iterator it = clones.find(ptr);
-              if (it != clones.end()) li->setOperand(0, it->second);
-            } else if (auto* bi = dyn_cast<BinaryOperator>(&I)) {
-              auto* lhs = dyn_cast<Instruction>(bi->getOperand(0));
-              inst_inst_map::iterator lhsIt = clones.find(lhs);
-              if (lhsIt != clones.end()) bi->setOperand(0, lhsIt->second);
-
-              auto* rhs = dyn_cast<Instruction>(bi->getOperand(1));
-              inst_inst_map::iterator rhsIt = clones.find(rhs);
-              if (rhsIt != clones.end()) bi->setOperand(0, rhsIt->second);
-            } else if (auto* ci = dyn_cast<CallInst>(&I)) {
-              for (int i = 0; i < ci->getNumOperands() - 1; i++) {
-                auto* arg = dyn_cast<Instruction>(ci->getOperand(i));
-                inst_inst_map::iterator argIt = clones.find(arg);
-                if (argIt != clones.end()) ci->setOperand(i, argIt->second);
-              }
-            }
-          }
-        }
+        if (i == 1) patchClonedBlock(clonedBlock, instClones);
 
         clonedLoop.push_back(clonedBlock);
       }
@@ -644,11 +436,10 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
             }
           }
         }
-        errs() << *block << "\n";
       }
 
       for (auto* untaintedClone : untaintedClones) {
-        untaintedClone->removeFromParent();
+        if (!isa<BranchInst>(untaintedClone)) untaintedClone->removeFromParent();
       }
     }
 #endif
@@ -658,11 +449,11 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
     auto* startDom = taintedBlocks.begin()->second;
     for (auto& [_, B] : taintedBlocks)
       startDom = domTree.findNearestCommonDominator(B, startDom);
-#if DEBUG
-    errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n";
-#endif
+      // #if DEBUG
+      //     errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n";
+      // #endif
 
-    // TODO: if an inst in the set is in the bb, we can truncate?
+      // TODO: if an inst in the set is in the bb, we can truncate?
 
 #if DEBUG
     errs() << "Start post dom tree analysis\n";
@@ -683,9 +474,9 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
       endDom = postDomTree.findNearestCommonDominator(taintedBlock, endDom);
     }
 
-#if DEBUG
-    errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n";
-#endif
+    // #if DEBUG
+    //     errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n";
+    // #endif
 
     if (startDom == nullptr) {
       errs() << "[Error] Null startDom\n";
@@ -697,11 +488,11 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
     startDom = domTree.findNearestCommonDominator(startDom, endDom);
     endDom = postDomTree.findNearestCommonDominator(startDom, endDom);
 
-#if DEBUG
-    errs() << "[Loop regionsNeeded] After matching scope\n";
-    errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n";
-    errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n";
-#endif
+    // #if DEBUG
+    //     errs() << "[Loop regionsNeeded] After matching scope\n";
+    //     errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n";
+    //     errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n";
+    // #endif
 
     // Extra check to disallow loop conditional block as the end
     if (loopCheck(endDom)) {
@@ -745,6 +536,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
   //}//end while regions needed
 
 #if DEBUG
+  errs() << "Final:\n"
+         << *root << "\n";
   errs() << "*** addRegion ***\n";
 #endif
 }
diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
index 45f30db..3da9778 100644
--- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp
+++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
@@ -639,7 +639,6 @@ std::set<CallInst*> findInputInsts(Module* M) {
           }
         }
       } else {
-        // TODO: Say something else
         errs() << "[ERROR] Could not unwrap function pointer from annotation\n";
       }
     }
diff --git a/ocelot/AtomicRegionInference/src/include/Helpers.h b/ocelot/AtomicRegionInference/src/include/Helpers.h
index d5f553d..bde9ca9 100644
--- a/ocelot/AtomicRegionInference/src/include/Helpers.h
+++ b/ocelot/AtomicRegionInference/src/include/Helpers.h
@@ -15,5 +15,6 @@ bool isAnnot(const StringRef annotName);
 void printInstInsts(const inst_insts_map& iim, bool onlyCalls = false);
 void printInsts(const inst_vec& iv);
 void printIntInsts(const std::map<int, inst_vec>& iim);
+void patchClonedBlock(BasicBlock* block, inst_inst_map clonedInsts);
 
 #endif
\ No newline at end of file

From 978fe072ee1b814258941d04c24b12323a844549 Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Tue, 12 Mar 2024 13:12:40 -0400
Subject: [PATCH 14/18] [InferAtomsPass] Don't optimize loops with tainted loop
 conditions

In the case of loop conditions that depend on fresh/consistent input
values, no instruction in the loop body can be extracted out from the
atomic region, as shown in the example below:

```rust
fn app() -> () {
  let x = input();
  for _ in 0..10 {
    let y = 1;
    log(y + 2);
    log(x);
  }
  Fresh(x);
}
```

Test plan:

`make eg8`
---
 benchmarks/ctests/example08.c                 |  27 ++
 benchmarks/ctests/example08.ll                |  96 +++++++
 benchmarks/ctests/example08.orig.ll           | 104 +++++++
 ocelot/AtomicRegionInference/Makefile         |  16 +-
 ocelot/AtomicRegionInference/src/Helpers.cpp  |  17 +-
 .../src/InferFreshCons.cpp                    | 268 +++++++++---------
 .../src/TaintTracker.cpp                      |   2 +-
 7 files changed, 387 insertions(+), 143 deletions(-)
 create mode 100644 benchmarks/ctests/example08.c
 create mode 100644 benchmarks/ctests/example08.ll
 create mode 100644 benchmarks/ctests/example08.orig.ll

diff --git a/benchmarks/ctests/example08.c b/benchmarks/ctests/example08.c
new file mode 100644
index 0000000..77a3580
--- /dev/null
+++ b/benchmarks/ctests/example08.c
@@ -0,0 +1,27 @@
+#include <stdio.h>
+
+void Fresh(int x) {}
+
+void atomic_start() {}
+void atomic_end() {}
+
+int input() { return 0; }
+int (*IO_NAME)() = input;
+
+void log(int x) {
+  printf("%d\n", x);
+}
+
+void app() {
+  int x = input();
+  for (int i = x; i < 10; i++) {
+    int y = 1;
+    log(y + 2);
+    log(x);
+  }
+  Fresh(x);
+}
+
+int main() {
+  app();
+}
\ No newline at end of file
diff --git a/benchmarks/ctests/example08.ll b/benchmarks/ctests/example08.ll
new file mode 100644
index 0000000..142b165
--- /dev/null
+++ b/benchmarks/ctests/example08.ll
@@ -0,0 +1,96 @@
+; ModuleID = '../../benchmarks/ctests/example08.c'
+source_filename = "../../benchmarks/ctests/example08.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %i = alloca i32, align 4
+  %y = alloca i32, align 4
+  call void @atomic_start()
+  %call = call i32 @input()
+  store i32 %call, ptr %x, align 4
+  %0 = load i32, ptr %x, align 4
+  store i32 %0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry, <null operand!>, <null operand!>
+  %1 = load i32, ptr %i, align 4
+  %2 = icmp slt i32 %1, 10
+  br i1 %2, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond, <null operand!>
+  %3 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %3)
+  store i32 1, ptr %y, align 4
+  %4 = load i32, ptr %y, align 4
+  %5 = add nsw i32 %4, 2
+  call void @log(i32 noundef %5)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, <null operand!>
+  %6 = load i32, ptr %i, align 4
+  %7 = add nsw i32 %6, 1
+  store i32 %7, ptr %i, align 4
+  br label %for.cond, !llvm.loop !5
+
+for.end:                                          ; preds = %for.cond, <null operand!>
+  call void @atomic_end()
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.mustprogress"}
diff --git a/benchmarks/ctests/example08.orig.ll b/benchmarks/ctests/example08.orig.ll
new file mode 100644
index 0000000..f0dbf25
--- /dev/null
+++ b/benchmarks/ctests/example08.orig.ll
@@ -0,0 +1,104 @@
+; ModuleID = '../../benchmarks/ctests/example08.c'
+source_filename = "../../benchmarks/ctests/example08.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Fresh(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %i = alloca i32, align 4
+  %y = alloca i32, align 4
+  %call = call i32 @input()
+  store i32 %call, ptr %x, align 4
+  %0 = load i32, ptr %x, align 4
+  store i32 %0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %1 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %1, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 1, ptr %y, align 4
+  %2 = load i32, ptr %y, align 4
+  %add = add nsw i32 %2, 2
+  call void @log(i32 noundef %add)
+  %3 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %3)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %4 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %4, 1
+  store i32 %inc, ptr %i, align 4
+  br label %for.cond, !llvm.loop !5
+
+for.end:                                          ; preds = %for.cond
+  %5 = load i32, ptr %x, align 4
+  call void @Fresh(i32 noundef %5)
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.mustprogress"}
diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile
index b92b0ff..53d3c55 100644
--- a/ocelot/AtomicRegionInference/Makefile
+++ b/ocelot/AtomicRegionInference/Makefile
@@ -28,21 +28,21 @@ eg8:
 	TEST=example08 make test
 
 run_eg1:
-	TEST=example01 make run
+	TEST=example01 make run && ../../benchmarks/ctests/example01.out
 run_eg2:
-	TEST=example02 make run
+	TEST=example02 make run && ../../benchmarks/ctests/example02.out
 run_eg3:
-	TEST=example03 make run
+	TEST=example03 make run && ../../benchmarks/ctests/example03.out
 run_eg4:
-	TEST=example04 make run
+	TEST=example04 make run && ../../benchmarks/ctests/example04.out
 run_eg5:
-	TEST=example05 make run
+	TEST=example05 make run && ../../benchmarks/ctests/example05.out
 run_eg6:
-	TEST=example06 make run
+	TEST=example06 make run && ../../benchmarks/ctests/example06.out
 run_eg7:
-	TEST=example07 make run
+	TEST=example07 make run && ../../benchmarks/ctests/example07.out
 run_eg8:
-	TEST=example08 make run
+	TEST=example08 make run && ../../benchmarks/ctests/example08.out
 
 test:
 	$(MAKE) -C build
diff --git a/ocelot/AtomicRegionInference/src/Helpers.cpp b/ocelot/AtomicRegionInference/src/Helpers.cpp
index 896250d..e446001 100644
--- a/ocelot/AtomicRegionInference/src/Helpers.cpp
+++ b/ocelot/AtomicRegionInference/src/Helpers.cpp
@@ -50,12 +50,13 @@ void printIntInsts(const std::map<int, inst_vec>& iim) {
  */
 void patchClonedBlock(BasicBlock* block, inst_inst_map clonedInsts) {
   for (auto& I : *block) {
-    if (auto* si = dyn_cast<StoreInst>(&I)) {
-      for (int i = 0; i < si->getNumOperands(); i++) {
-        auto* operand = dyn_cast<Instruction>(si->getOperand(i));
+    if (isa<StoreInst>(I) || isa<CmpInst>(I)) {
+      auto* inst = dyn_cast<Instruction>(&I);
+      for (int i = 0; i < inst->getNumOperands(); i++) {
+        auto* operand = dyn_cast<Instruction>(inst->getOperand(i));
         if (operand != nullptr) {
           inst_inst_map::iterator it = clonedInsts.find(operand);
-          if (it != clonedInsts.end()) si->setOperand(i, it->second);
+          if (it != clonedInsts.end()) inst->setOperand(i, it->second);
         }
       }
     } else if (auto* li = dyn_cast<LoadInst>(&I)) {
@@ -72,9 +73,13 @@ void patchClonedBlock(BasicBlock* block, inst_inst_map clonedInsts) {
       // The last operand is the called function
       for (unsigned i = 0; i < ci->getNumOperands() - 1; i++) {
         auto* arg = dyn_cast<Instruction>(ci->getOperand(i));
-        inst_inst_map::iterator argIt = clonedInsts.find(arg);
-        if (argIt != clonedInsts.end()) ci->setOperand(i, argIt->second);
+        inst_inst_map::iterator it = clonedInsts.find(arg);
+        if (it != clonedInsts.end()) ci->setOperand(i, it->second);
       }
+    } else if (auto* ci = dyn_cast<BranchInst>(&I)) {
+      auto* cond = dyn_cast<Instruction>(ci->getOperand(0));
+      inst_inst_map::iterator it = clonedInsts.find(cond);
+      if (it != clonedInsts.end()) ci->setOperand(0, it->second);
     }
   }
 }
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index 81c7364..b70b112 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -50,13 +50,17 @@ bool InferFreshCons::loopCheck(BasicBlock* B) {
 // Find the first block after a for loop
 BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) {
   auto* ti = bb->getTerminator();
-  auto* end = ti->getSuccessor(0);
-  ti = end->getTerminator();
-  // errs() << "end is " << end->getName() << "\n";
-  // for switch inst, succ 0 is the fall through
-  end = ti->getSuccessor(1);
-  // errs() << "end is " << end->getName() << "\n";
-  return end;
+  if (ti->getNumSuccessors() == 0) {
+    return bb;
+  } else {
+    auto* end = ti->getSuccessor(0);
+    ti = end->getTerminator();
+    // errs() << "end is " << end->getName() << "\n";
+    // for switch inst, succ 0 is the fall through
+    end = ti->getSuccessor(1);
+    // errs() << "end is " << end->getName() << "\n";
+    return end;
+  }
 }
 
 // Top level region inference function -- could flatten later
@@ -194,6 +198,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
 
     LoopInfo& LI = FAM->getResult<LoopAnalysis>(*homeFun);
     std::map<Loop*, std::vector<Instruction*>> untaintedLoopClones;
+    bool loopCondTainted = false;
 
 #if DEBUG
     errs() << "[Loop regionsNeeded] Go over all blocks\n";
@@ -248,9 +253,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
 
             auto* loop = LI.getLoopFor(&B);
             if (loop != nullptr) {
-#if DEBUG
-              errs() << "__In loop, keep track of it__\n";
-#endif
+              if (&B != loop->getBlocks()[1]) loopCondTainted = true;
+
               if (untaintedLoopClones.count(loop) == 0)
                 untaintedLoopClones[loop] = {clone};
               else
@@ -299,147 +303,155 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
       }
     }
 
-    for (auto& [taintedLoop, untaintedClones] : untaintedLoopClones) {
-#if DEBUG
-      errs() << "Clone taintedLoop\n";
-#endif
-      errs() << "ayo\n";
-      std::vector<BasicBlock*> clonedLoop;
-      BasicBlock* forEnd;
-      Instruction* clonedAlloca;
-      Value* initVal;
-      inst_inst_map instClones;
-
-      auto loopBlocks = taintedLoop->getBlocks();
-      assert(loopBlocks.size() == 3);
-
-      for (int i = 0; i < loopBlocks.size(); i++) {
-        auto* block = loopBlocks[i];
-        auto* clonedBlock = BasicBlock::Create(block->getContext(), block->getName(), homeFun);
-        IRBuilder builder(clonedBlock);
-
-        Instruction* prev;
-        for (auto& I : *block) {
-          auto* clonedI = I.clone();
-
-          // Only extract if untainted
-          // Covers the cond and inc blocks; they are processed on the fly due to
-          // their special role in keeping the loop going
-          if (find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end()) {
-            // for.cond
-            if (i == 0) {
-              if (auto* li = dyn_cast<LoadInst>(clonedI)) {
-                auto* ptr = li->getPointerOperand();
-
-                if (auto* ai = dyn_cast<AllocaInst>(*ptr->uses().begin())) {
-                  IRBuilder builder(ai);
-                  clonedAlloca = builder.CreateAlloca(ai->getAllocatedType());
-                }
+    if (!loopCondTainted) {
+      for (auto& [taintedLoop, untaintedClones] : untaintedLoopClones) {
+#if DEBUG
+        errs() << "Clone taintedLoop\n";
+#endif
+        std::vector<BasicBlock*> clonedLoop;
+        BasicBlock* forEnd;
+        Instruction* clonedAlloca;
+        Value* initVal;
+        inst_inst_map instClones;
+
+        auto loopBlocks = taintedLoop->getBlocks();
+        assert(loopBlocks.size() == 3);
+
+        for (int i = 0; i < loopBlocks.size(); i++) {
+          auto* block = loopBlocks[i];
+          auto* clonedBlock = BasicBlock::Create(block->getContext(), block->getName(), homeFun);
+          IRBuilder builder(clonedBlock);
+
+#if DEBUG
+          errs() << "Clone block " << block->getName() << "\n";
+#endif
+
+          Instruction* prev;
+          for (auto& I : *block) {
+#if DEBUG
+            errs() << "Clone inst: " << I << "\n";
+#endif
+            auto* clonedI = I.clone();
+
+            // Only extract if untainted
+            // Covers the cond and inc blocks; they are processed on the fly due to
+            // their special role in keeping the loop going
+            if (find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end()) {
+              // for.cond
+              if (i == 0) {
+                if (auto* li = dyn_cast<LoadInst>(clonedI)) {
+                  auto* ptr = li->getPointerOperand();
 
-                for (auto* ptrUser : ptr->users()) {
-                  if (auto* si = dyn_cast<StoreInst>(ptrUser)) {
-                    if (!isa<BinaryOperator>(si->getOperand(0))) {
-                      initVal = si->getOperand(0);
+                  if (auto* ai = dyn_cast<AllocaInst>(*ptr->uses().begin())) {
+                    IRBuilder builder(ai);
+                    clonedAlloca = builder.CreateAlloca(ai->getAllocatedType());
+                  }
+
+                  for (auto* ptrUser : ptr->users()) {
+                    if (auto* si = dyn_cast<StoreInst>(ptrUser)) {
+                      if (!isa<BinaryOperator>(si->getOperand(0))) {
+                        initVal = si->getOperand(0);
+                      }
                     }
                   }
-                }
 
-                li->setOperand(0, clonedAlloca);
-                prev = li;
-              } else if (auto* ci = dyn_cast<CmpInst>(clonedI)) {
-                // TODO: Check if operand originates from the current loop
-                if (isa<LoadInst>(ci->getOperand(0))) {
-                  ci->setOperand(0, prev);
-                }
-                prev = ci;
-              } else if (auto* bi = dyn_cast<BranchInst>(clonedI)) {
-                assert(bi->isConditional());
-                bi->setCondition(prev);
+                  li->setOperand(0, clonedAlloca);
+                  prev = li;
+                } else if (auto* ci = dyn_cast<CmpInst>(clonedI)) {
+                  // TODO: Check if operand originates from the current loop
+                  if (isa<LoadInst>(ci->getOperand(0))) {
+                    ci->setOperand(0, prev);
+                  }
+                  prev = ci;
+                } else if (auto* bi = dyn_cast<BranchInst>(clonedI)) {
+                  assert(bi->isConditional());
+                  bi->setCondition(prev);
 
-                if (auto* B = dyn_cast<BasicBlock>(bi->getOperand(1))) {
-                  forEnd = B;
+                  if (auto* B = dyn_cast<BasicBlock>(bi->getOperand(1))) {
+                    forEnd = B;
+                  }
                 }
               }
-            }
 
-            // for.inc
-            else if (i == 2) {
-              if (auto* li = dyn_cast<LoadInst>(clonedI)) {
-                li->setOperand(0, clonedAlloca);
-                prev = li;
-              } else if (auto* bi = dyn_cast<BinaryOperator>(clonedI)) {
-                auto* lhs = bi->getOperand(0);
-                if (isa<LoadInst>(lhs)) bi->setOperand(0, prev);
-                auto* rhs = bi->getOperand(1);
-                if (isa<LoadInst>(rhs)) bi->setOperand(1, prev);
-                prev = bi;
-              } else if (auto* si = dyn_cast<StoreInst>(clonedI)) {
-                si->setOperand(0, prev);
-                si->setOperand(1, clonedAlloca);
+              // for.inc
+              else if (i == 2) {
+                if (auto* li = dyn_cast<LoadInst>(clonedI)) {
+                  li->setOperand(0, clonedAlloca);
+                  prev = li;
+                } else if (auto* bi = dyn_cast<BinaryOperator>(clonedI)) {
+                  auto* lhs = bi->getOperand(0);
+                  if (isa<LoadInst>(lhs)) bi->setOperand(0, prev);
+                  auto* rhs = bi->getOperand(1);
+                  if (isa<LoadInst>(rhs)) bi->setOperand(1, prev);
+                  prev = bi;
+                } else if (auto* si = dyn_cast<StoreInst>(clonedI)) {
+                  si->setOperand(0, prev);
+                  si->setOperand(1, clonedAlloca);
+                }
               }
-            }
 
-            instClones.emplace(&I, clonedI);
-            builder.Insert(clonedI);
+              instClones.emplace(&I, clonedI);
+              builder.Insert(clonedI);
+            }
           }
-        }
-
-        // for.body
-        // Performs a standard sound cloning procedure (on each operand);
-        // the instructions in the body are unrelated to the loop except the final
-        // branch instruction
-        if (i == 1) patchClonedBlock(clonedBlock, instClones);
 
-        clonedLoop.push_back(clonedBlock);
-      }
+          // for.body
+          // Performs a standard sound cloning procedure (on each operand);
+          // the instructions in the body are unrelated to the loop except the final
+          // branch instruction
+          if (i == 1) patchClonedBlock(clonedBlock, instClones);
 
-      BasicBlock* forEndClone = BasicBlock::Create(forEnd->getContext(), forEnd->getName(), homeFun);
-      IRBuilder builder(forEndClone);
-      for (auto& I : *forEnd) {
-        if (!isa<CallInst>(I) && !isa<LoadInst>(I)) {
-          auto* clone = I.clone();
-          builder.Insert(clone);
+          clonedLoop.push_back(clonedBlock);
         }
 
-        if (isa<ReturnInst>(I)) {
-          IRBuilder builder(&I);
-          builder.CreateBr(clonedLoop[0]);
-          I.removeFromParent();
-          break;
-        }
-      }
+        BasicBlock* forEndClone = BasicBlock::Create(forEnd->getContext(), forEnd->getName(), homeFun);
+        IRBuilder builder(forEndClone);
+        for (auto& I : *forEnd) {
+          if (!isa<CallInst>(I) && !isa<LoadInst>(I)) {
+            auto* clone = I.clone();
+            builder.Insert(clone);
+          }
 
-      for (auto& I : *forEnd) {
-        if (auto* bi = dyn_cast<BranchInst>(&I)) {
-          IRBuilder builder(bi);
-          builder.CreateStore(initVal, clonedAlloca);
+          if (isa<ReturnInst>(I)) {
+            IRBuilder builder(&I);
+            builder.CreateBr(clonedLoop[0]);
+            I.removeFromParent();
+            break;
+          }
         }
-      }
 
-      // Connect the blocks of the new loop
-      for (int i = 0; i < clonedLoop.size(); i++) {
-        auto* block = clonedLoop[i];
-        for (auto& I : *block) {
+        for (auto& I : *forEnd) {
           if (auto* bi = dyn_cast<BranchInst>(&I)) {
-            // for.cond
-            if (i == 0) {
-              bi->setSuccessor(0, clonedLoop[1]);
-              bi->setSuccessor(1, forEndClone);
-            }
-            // for.body
-            else if (i == 1) {
-              bi->setSuccessor(0, clonedLoop[2]);
-            }
-            // for.inc
-            else if (i == 2) {
-              bi->setSuccessor(0, clonedLoop[0]);
+            IRBuilder builder(bi);
+            builder.CreateStore(initVal, clonedAlloca);
+          }
+        }
+
+        // Connect the blocks of the new loop
+        for (int i = 0; i < clonedLoop.size(); i++) {
+          auto* block = clonedLoop[i];
+          for (auto& I : *block) {
+            if (auto* bi = dyn_cast<BranchInst>(&I)) {
+              // for.cond
+              if (i == 0) {
+                bi->setSuccessor(0, clonedLoop[1]);
+                bi->setSuccessor(1, forEndClone);
+              }
+              // for.body
+              else if (i == 1) {
+                bi->setSuccessor(0, clonedLoop[2]);
+              }
+              // for.inc
+              else if (i == 2) {
+                bi->setSuccessor(0, clonedLoop[0]);
+              }
             }
           }
         }
-      }
 
-      for (auto* untaintedClone : untaintedClones) {
-        if (!isa<BranchInst>(untaintedClone)) untaintedClone->removeFromParent();
+        for (auto* untaintedClone : untaintedClones) {
+          if (!isa<BranchInst>(untaintedClone)) untaintedClone->removeFromParent();
+        }
       }
     }
 #endif
diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
index 3da9778..bea58e7 100644
--- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp
+++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
@@ -1023,7 +1023,7 @@ inst_vec traverseUses(Instruction* root) {
   }
 
 #if DEBUG
-  errs() << "=== traverseUses ===\n";
+  errs() << "*** traverseUses ***\n";
 #endif
   inst_vec uses_vec(uses.begin(), uses.end());
   return uses_vec;

From c02ae2c83634ff58269889ae15aeca74b66344a6 Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Wed, 13 Mar 2024 12:46:50 -0500
Subject: [PATCH 15/18] [InferAtomsPass] More tests and working impl. for some
 Rust programs

Fix an issue with extracting IO functions from source code. Add several
tests, including a few in Rust.
---
 benchmarks/ctests/example.bc                  | Bin 5984 -> 6304 bytes
 benchmarks/ctests/example.ll                  | 178 +++++++++++
 benchmarks/ctests/example.orig.ll             | 183 ++++++++++++
 benchmarks/ctests/example.rs                  |  11 +-
 benchmarks/ctests/example09.c                 |  26 ++
 benchmarks/ctests/example09.ll                | 111 +++++++
 benchmarks/ctests/example09.orig.ll           | 100 +++++++
 benchmarks/ctests/example10.c                 |  25 ++
 benchmarks/ctests/example10.ll                |  92 ++++++
 benchmarks/ctests/example10.orig.ll           | 100 +++++++
 benchmarks/ctests/example11.bc                | Bin 0 -> 6352 bytes
 benchmarks/ctests/example11.ll                | 179 +++++++++++
 benchmarks/ctests/example11.orig.ll           | 184 ++++++++++++
 benchmarks/ctests/example11.rs                |  26 ++
 benchmarks/ctests/example12.bc                | Bin 0 -> 8160 bytes
 benchmarks/ctests/example12.ll                | 274 +++++++++++++++++
 benchmarks/ctests/example12.orig.ll           | 279 ++++++++++++++++++
 benchmarks/ctests/example12.rs                |  26 ++
 benchmarks/intermittent.rs                    |  65 ++--
 ocelot/AtomicRegionInference/Makefile         |  35 ++-
 .../AtomicRegionInference/src/InferAtoms.cpp  |   9 +-
 .../src/TaintTracker.cpp                      |  12 +-
 22 files changed, 1869 insertions(+), 46 deletions(-)
 create mode 100644 benchmarks/ctests/example.ll
 create mode 100644 benchmarks/ctests/example.orig.ll
 create mode 100644 benchmarks/ctests/example09.c
 create mode 100644 benchmarks/ctests/example09.ll
 create mode 100644 benchmarks/ctests/example09.orig.ll
 create mode 100644 benchmarks/ctests/example10.c
 create mode 100644 benchmarks/ctests/example10.ll
 create mode 100644 benchmarks/ctests/example10.orig.ll
 create mode 100644 benchmarks/ctests/example11.bc
 create mode 100644 benchmarks/ctests/example11.ll
 create mode 100644 benchmarks/ctests/example11.orig.ll
 create mode 100644 benchmarks/ctests/example11.rs
 create mode 100644 benchmarks/ctests/example12.bc
 create mode 100644 benchmarks/ctests/example12.ll
 create mode 100644 benchmarks/ctests/example12.orig.ll
 create mode 100644 benchmarks/ctests/example12.rs

diff --git a/benchmarks/ctests/example.bc b/benchmarks/ctests/example.bc
index 4163fd996b61aa6750aed7c63c25855384623a88..6513a4190f5bf1e1cc2d48f15cd4b9bc4661b52b 100644
GIT binary patch
delta 2298
zcmZWqeQ;FO6+dtH?%TJU-OWpOlPrxSHy?Z?W-<#2DQX<vCITUXY!J|qrke#)7feEg
z;OF4pAZW@*2Y8z?gN`l|hX}S+Qom+&x|@`iGId&7>@e!6X{+t@uc|bynh}3@cLO_m
zXU@KRe&^itJLlYw{kT4{$n#;cB#$tBkVF2Xo;#GYM?3FR{6~LZ9;`oJWfWFkwrC{W
z$s3eLONd!N)r=E9jq9Ik$%N*N^SDuz8Y>*x%t_bIh1x6VLc-_Q0-``1x6t@J<%$w}
zYabCucuglhp*)#?B;Qp;qT=0Yr=461kpUYqvbbKn6m%ELsU{?(5%aIR$ZG2-;DQQI
z(n$-9{>|NaH+6BS76^#D+PW7!(oabPDuWorlSiCCzS5<LL82G}5si@{Du&<tU{aw;
zs5*ocL9M3)XKgB}h{PpiA?1nn|7&cN>^YQwK9y1sHiHToYQr*jemVh(X0kU9xXQ?n
z1cZdr*&p37MX@A@dM&TopO4tnktLep+x<fGclf?KbCVFXFzq1SP~yDsos0=Nhe(dr
z(}i2gxC|`Pi0sz6k<UrtP=R8g4cmX$q&bMJ+4{g--K6<`GMDJQMe)FAaoLD(PQmIU
zhaB@){+6|_vvsgfLdTB{nS%M*!QZT@8GnZLSsfRa>`io4Y3)tpUTvYGJZm;P=5n_D
zeZ7vTO1076uKwu6kfI3gZytQYs(_HD&4(Qg>qgs&8VbZ>bu{1|T)4$>POAI*_2f`+
zCwz@~m%1Y8D%D#Is^({dOLdaRaYg0KTl;lm>-zaS+lD^l`H=;|?s8`3hljOTA2+{o
zEU8LO*{W0q-sF$wj9un8Xe!pJl{0Vb*9%|q&99vK)gE)Bb8$_EK5kJ@7*?Li^yQwE
zI%UA!1}UzbdF`;athd~J#n}+M?j6{SW2C8nOr$+!Kc(j$Ew+dDH!2r14W<Yus)x<X
zZmW|UBg-Cb?_%QLQOsoF!xmY1u%=6Qq6<>#F_>Lhy^Hf9*F>SJ6o@M~`|K@)mE-R7
zE%b*b))Kf^XJ4qTMRd}k1dem|w&&2_(V~<=Z+qUcy3@09ma3PTVQ=Y&0T%47p4jp9
zPyh3mjpm8GR%P1!ByXAd|2$n8H65-Sq3?H9lwKMN%Kcf|5!f7(wRwu7=(kLH+Dy5)
zz4;h6j>yzq)zRtTOp!j<s90k(0wRiVB6(cAOxeBskRKOuZ)y_DWgn(H>V8Ib_E4Kw
zUjvntMVYI)Nln}?bvYtDbHu$Yl|3Z~X32c!jH}4Q=PWWvXOI8m%--O6!z_2tQ5wvf
z+%2&s{Z8u`qg1d!Tu_)G-eEfx!f|#jjHhbQbEfTv#}Ff<;u(wWc_soKA;&4X8S#l4
zncl19L<z<s(E=spP8dXiy5>N9vIcjYx&ParHV4H7-6puvQIO!cWb5NLgA~40kNK%q
zx7k!jF=@6ux?dUz*T$n)?z3KIZK1Wo_LMD}yhqq4Z9_(<aO_jZ{uC8?BkK9UcHw?W
z{nyL)iTrb_Hq|2vK2)QFeOOKh4-Dt8e7!vRgOV>}{zv~hd^K_D-Cf0xH$3ppEtYOP
zcPHUfXpM85e23%~m7Ci<rLhvsgatns5x{<YAI7%~U+F2wtX<BfzZ_4&CxmY)WG&<>
z1_g-z3H(Or+aa}CT*2#Ch{mmg9*4ALarnn*6AYUGd;q!Ux`AE;ehc&_wEE^8{Ie|H
z{~f#zckdziod*73j{hj|yP#K~$c;Jp*T5P0=|LEfbfyAidNpkzdL#!Iz^!->z)gE{
zZ~|@zJ`cF;QuS;cbfONpd_qQnBjmLXEe7a@{vvP}_^%N!gF(JkufpKW!P|j@hkgyb
zA&YAO9s}-!{yA`O4t^|4U_({%e@<?V^nWfJzX14b{JGL_8U~qRDGaj0^j-r0rFpAf
z5`Gg_c@m`Hk0xV<TPjO~f352@aydpPcwfgx;K2O?{|xlNKxA`IcYR>h+VCC0HOnId
zJ9@Wm32*K0ABdXV7aIRacUjbd-u|8)_nJ-K1*I!?cJGR2y><;mb`E6c-F;ilPVb81
zG#1W)z_PRPlin$d9bv*U&c))|vUHrRHJrOBELR~^gIQlv6#SkgOV=CO*0(x5n|wH%
M{cOB+`ss4%TQ7_t7XSbN

delta 1991
zcmZ`&eN0nV6hF6pZC^`UA5cR1($^Lh7_bNBQ|Ekz;gA6qha2;Y7K_3JERIaJ#q_lW
zh7%LmV;M0zbwix)LkFAD#VpVnr;A37)0izYbH=zomZ(v;n2>De7Ldloo1Ay=`JLbW
zopbKJZ-34o*~S~=@lk-ogjN|pV5}zF2i9#S@dM}PEy>(%6JxB`6)Z@GaaN4h4cVZ3
zWwKYqIcQCF2Ao|VR`gdmKWK*{-ef!20coWbE`A)AL9=cwojfGI(JlZUrCAR65YZUC
z<&RDU2$Ukkp#hPgG^5_BWN--zo<a&BJ+*0moeuO$0FgorDD;P2rST>(vj7r>SYXU6
z5AT?327w9@G!9r{*c!jY2`(O81u}hUz`4qA+y8aJxPTa@K}CMN|42`Zn@E0iQ4keH
z$#QkkM??uhbZ09YN=yuIk`SFrdEpnc00Q0gB<1KA6C_;c6RB0}2mE2F=2@*HH+Ba#
ztb|qZR1Ej`)CvU-F<b!E!UV!_qTQ;1ak{1es_rl-x*w<kMnwQ4BDr&iObOAI{e9X6
zFP~vbD^xYiZD;85`|>5m%tUoWS;qcm+LYQId4c{)8>`J9+rXv~vPbUIrtn?EzjB!X
zHnj=@E>~Tsmk^0J7x{aX*f_w5d`@d$IamQ;j~4$N2vwdQ=o}VWL{>xk%&8hSFk#cW
zU8d8=h=pOqkR_>0iFMQ0^N(fnR68enYOl4I_ul8{xUG)HL`KQ)^0U6SB)M9bZ}T5e
z+0+c?Jyw}8e4S5bu_rignLN?X#e8GBW|{o9Ssu~lrcb4Rb+lVykowFfU<3ZGs;Nw%
z!YyYklgHm-m$oL#B+Bl4!k+0Rh`k_--ZN!3v%c-nokuQH0Ee3g$ig|6c`1|LGHrXX
z!o^5`HQ|ajUOP$~A8a`w#FEscbbS&x;ead2ENcz!?;z1q$0C!a#9<vXoQ3lRFES;<
zCXRWaR>r|+S|aS`nZ3roTXd5MdyN;AhVW{PnaI8=f)*1L3XG$5UTDjj-g{Tn@}sCy
zG9nl2m&%z2PCw$9jN?*7OO=>MWUt;#2IY17BpO1}cX~_G#a;)!Frh|ha01V&&&M_u
zap4|D<okMFQM60mXUH}tC*+90F-lJ`;5f_6zZnw9^YSA@iEqAFr~I4&zr!H?Oc)97
zIhjLFe2;8u?L;~r2cv03MlS<m${vO4Q)D=t>>+wiwV`x+f(N}iJY6wMDnZAop9(3{
zkfD##>2G^=__`gXWx#L{_)#MJ6*Zz$auLaS44|F&O}}6xXYEp)W{S9Ki3zIeq&Fn>
zQB^0bmDszgA*o){1=T0Un<V|m>g({PWIU=i4{XMKJ9UNKS0A$P)m_~-@$k;Puapn6
zY91$_A8osp3ype!Zsf!$2h(Vw^j;U@ht^q%M&Cr8Ts0@k7lSJn%cP?j8D^aEUxm3i
zPyKVVamt~5I`$#QiSrz^6=?tA00H1G;?JSK9xY=UCwTygSjKYnd(q0Lape180|wS0
za0zYoYyf^hd@=eNSnHw){P8sIc?!?Nzo8R(pCG<1g8wDrucIG_O}-X^EAfY0fcarF
z2IfZu(lLNJpdWE;W~wvnAPezS^ku|tGdOl!GtEE#6n_=sRQDv}bl=Pgy^A=?!S9IM
zUziTCh<uF!6Z#xxjg7!Vh+`9AL!1^C7DE0{a(i_A;>#GN7e!4o6Qwj9>+l(TkLQ8U
z;A9H=Z+EyGS{k#3<tyr|9aUvxccRxSUa4PQUD(*^ZuPb_<}cgY_*PR+VUwpgH-Dq2
zD8D!-uef1DfxE1u)f>UHw`|#jEPKJm!Uj)6@y0@r$DP;EP$n-oEtGefmW|yp-BeUX
oY<RYCJyniiK2!Q)d<II}<qC6LN~^oMt+IZmoBHW?!Yu>*4=YaI7XSbN

diff --git a/benchmarks/ctests/example.ll b/benchmarks/ctests/example.ll
new file mode 100644
index 0000000..1fce17b
--- /dev/null
+++ b/benchmarks/ctests/example.ll
@@ -0,0 +1,178 @@
+; ModuleID = '../../benchmarks/ctests/example.bc'
+source_filename = "example.a08634fc28d17a86-cgu.0"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hd44a932dcf55a427E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h06fb5f22a45b1729E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE" }>, align 8
+@IO_NAME = constant <{ ptr }> <{ ptr @tmp }>, align 8
+@atomic_depth = external global i16
+
+; Function Attrs: noinline uwtable
+define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h11952cf61e1518ebE(ptr %f) unnamed_addr #0 {
+start:
+  call void @_ZN4core3ops8function6FnOnce9call_once17h2e8e8fa7347da120E(ptr %f)
+  call void asm sideeffect "", "~{memory}"(), !srcloc !3
+  ret void
+}
+
+; Function Attrs: uwtable
+define hidden i64 @_ZN3std2rt10lang_start17ha3b54fab1f2518b9E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 {
+start:
+  %_8 = alloca ptr, align 8
+  %_5 = alloca i64, align 8
+  store ptr %main, ptr %_8, align 8
+  %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe)
+  store i64 %0, ptr %_5, align 8
+  %v = load i64, ptr %_5, align 8, !noundef !4
+  ret i64 %v
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h11952cf61e1518ebE(ptr %_4)
+  %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h026d1b3fd579707fE"()
+  %_0 = zext i8 %self to i32
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h06fb5f22a45b1729E"(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h631194d6dbd64289E(ptr %0)
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @_ZN4core3ops8function6FnOnce9call_once17h2e8e8fa7347da120E(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  call void %_1()
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h631194d6dbd64289E(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality {
+start:
+  %1 = alloca { ptr, i32 }, align 8
+  %_2 = alloca {}, align 1
+  %_1 = alloca ptr, align 8
+  store ptr %0, ptr %_1, align 8
+  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE"(ptr align 8 %_1)
+          to label %bb1 unwind label %cleanup
+
+bb3:                                              ; preds = %cleanup
+  %2 = load ptr, ptr %1, align 8, !noundef !4
+  %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  %4 = load i32, ptr %3, align 8, !noundef !4
+  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
+  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
+  resume { ptr, i32 } %6
+
+cleanup:                                          ; preds = %start
+  %7 = landingpad { ptr, i32 }
+          cleanup
+  %8 = extractvalue { ptr, i32 } %7, 0
+  %9 = extractvalue { ptr, i32 } %7, 1
+  %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
+  store ptr %8, ptr %10, align 8
+  %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  store i32 %9, ptr %11, align 8
+  br label %bb3
+
+bb1:                                              ; preds = %start
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hd44a932dcf55a427E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h026d1b3fd579707fE"() unnamed_addr #2 {
+start:
+  ret i8 0
+}
+
+; Function Attrs: uwtable
+define dso_local i32 @tmp() unnamed_addr #1 {
+start:
+  ret i32 0
+}
+
+; Function Attrs: uwtable
+define dso_local void @log(i32 %i) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @app() unnamed_addr #1 {
+start:
+  call void @atomic_start()
+  %x = call i32 @tmp()
+  call void @log(i32 %x)
+  call void @atomic_end()
+  ret void
+}
+
+; Function Attrs: uwtable
+define internal void @_ZN7example4main17ha3370acdcff48c7aE() unnamed_addr #1 {
+start:
+  call void @app()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_start() unnamed_addr #1 {
+start:
+  %local = load i16, ptr @atomic_depth, align 2, !noundef !4
+  call void @start_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_end() unnamed_addr #1 {
+start:
+  call void @end_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @start_atomic() unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @end_atomic() unnamed_addr #1
+
+define i32 @main(i32 %0, ptr %1) unnamed_addr #3 {
+top:
+  %2 = sext i32 %0 to i64
+  %3 = call i64 @_ZN3std2rt10lang_start17ha3b54fab1f2518b9E(ptr @_ZN7example4main17ha3370acdcff48c7aE, i64 %2, ptr %1, i8 0)
+  %4 = trunc i64 %3 to i32
+  ret i32 %4
+}
+
+attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 8, !"PIC Level", i32 2}
+!1 = !{i32 7, !"PIE Level", i32 2}
+!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
+!3 = !{i32 1115277}
+!4 = !{}
diff --git a/benchmarks/ctests/example.orig.ll b/benchmarks/ctests/example.orig.ll
new file mode 100644
index 0000000..7cbde04
--- /dev/null
+++ b/benchmarks/ctests/example.orig.ll
@@ -0,0 +1,183 @@
+; ModuleID = '../../benchmarks/ctests/example.bc'
+source_filename = "example.a08634fc28d17a86-cgu.0"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hd44a932dcf55a427E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h06fb5f22a45b1729E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE" }>, align 8
+@IO_NAME = constant <{ ptr }> <{ ptr @tmp }>, align 8
+@atomic_depth = external global i16
+
+; Function Attrs: noinline uwtable
+define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h11952cf61e1518ebE(ptr %f) unnamed_addr #0 {
+start:
+  call void @_ZN4core3ops8function6FnOnce9call_once17h2e8e8fa7347da120E(ptr %f)
+  call void asm sideeffect "", "~{memory}"(), !srcloc !3
+  ret void
+}
+
+; Function Attrs: uwtable
+define hidden i64 @_ZN3std2rt10lang_start17ha3b54fab1f2518b9E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 {
+start:
+  %_8 = alloca ptr, align 8
+  %_5 = alloca i64, align 8
+  store ptr %main, ptr %_8, align 8
+  %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe)
+  store i64 %0, ptr %_5, align 8
+  %v = load i64, ptr %_5, align 8, !noundef !4
+  ret i64 %v
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h11952cf61e1518ebE(ptr %_4)
+  %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h026d1b3fd579707fE"()
+  %_0 = zext i8 %self to i32
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h06fb5f22a45b1729E"(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h631194d6dbd64289E(ptr %0)
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @_ZN4core3ops8function6FnOnce9call_once17h2e8e8fa7347da120E(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  call void %_1()
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h631194d6dbd64289E(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality {
+start:
+  %1 = alloca { ptr, i32 }, align 8
+  %_2 = alloca {}, align 1
+  %_1 = alloca ptr, align 8
+  store ptr %0, ptr %_1, align 8
+  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he4a3bb6af4f8bfafE"(ptr align 8 %_1)
+          to label %bb1 unwind label %cleanup
+
+bb3:                                              ; preds = %cleanup
+  %2 = load ptr, ptr %1, align 8, !noundef !4
+  %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  %4 = load i32, ptr %3, align 8, !noundef !4
+  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
+  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
+  resume { ptr, i32 } %6
+
+cleanup:                                          ; preds = %start
+  %7 = landingpad { ptr, i32 }
+          cleanup
+  %8 = extractvalue { ptr, i32 } %7, 0
+  %9 = extractvalue { ptr, i32 } %7, 1
+  %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
+  store ptr %8, ptr %10, align 8
+  %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  store i32 %9, ptr %11, align 8
+  br label %bb3
+
+bb1:                                              ; preds = %start
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hd44a932dcf55a427E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h026d1b3fd579707fE"() unnamed_addr #2 {
+start:
+  ret i8 0
+}
+
+; Function Attrs: uwtable
+define dso_local i32 @tmp() unnamed_addr #1 {
+start:
+  ret i32 0
+}
+
+; Function Attrs: uwtable
+define dso_local void @log(i32 %i) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @app() unnamed_addr #1 {
+start:
+  %x = call i32 @tmp()
+  call void @Fresh(i32 %x)
+  call void @log(i32 %x)
+  ret void
+}
+
+; Function Attrs: uwtable
+define internal void @_ZN7example4main17ha3370acdcff48c7aE() unnamed_addr #1 {
+start:
+  call void @app()
+  ret void
+}
+
+; Function Attrs: uwtable
+define internal void @Fresh(i32 %_var) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_start() unnamed_addr #1 {
+start:
+  %local = load i16, ptr @atomic_depth, align 2, !noundef !4
+  call void @start_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_end() unnamed_addr #1 {
+start:
+  call void @end_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @start_atomic() unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @end_atomic() unnamed_addr #1
+
+define i32 @main(i32 %0, ptr %1) unnamed_addr #3 {
+top:
+  %2 = sext i32 %0 to i64
+  %3 = call i64 @_ZN3std2rt10lang_start17ha3b54fab1f2518b9E(ptr @_ZN7example4main17ha3370acdcff48c7aE, i64 %2, ptr %1, i8 0)
+  %4 = trunc i64 %3 to i32
+  ret i32 %4
+}
+
+attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 8, !"PIC Level", i32 2}
+!1 = !{i32 7, !"PIE Level", i32 2}
+!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
+!3 = !{i32 1115277}
+!4 = !{}
diff --git a/benchmarks/ctests/example.rs b/benchmarks/ctests/example.rs
index 68583bd..bb6f8a1 100644
--- a/benchmarks/ctests/example.rs
+++ b/benchmarks/ctests/example.rs
@@ -1,15 +1,14 @@
-fn Fresh<T>(_var: T) -> () {}
-
-fn Consistent<T>(_var: T, _id: u16) -> () {}
-
-#[no_mangle]
-pub static IO_NAME: fn() -> i32 = tmp;
+include!("../intermittent.rs");
 
 #[no_mangle]
 fn tmp() -> i32 {
     0
 }
 
+#[no_mangle]
+pub static IO_NAME: fn() -> i32 = tmp;
+
+#[no_mangle]
 fn log(i: i32) -> () {}
 
 #[no_mangle]
diff --git a/benchmarks/ctests/example09.c b/benchmarks/ctests/example09.c
new file mode 100644
index 0000000..ace57fa
--- /dev/null
+++ b/benchmarks/ctests/example09.c
@@ -0,0 +1,26 @@
+#include <stdio.h>
+
+void Fresh(int x) {}
+
+void atomic_start() {}
+void atomic_end() {}
+
+int input() { return 0; }
+int (*IO_NAME)() = input;
+
+void log(int x) {
+  printf("%d\n", x);
+}
+
+void app() {
+  int x = input();
+  for (int i = 0; i < 10; i++) {
+    log(x);
+    log(i);
+  }
+  Fresh(x);
+}
+
+int main() {
+  app();
+}
\ No newline at end of file
diff --git a/benchmarks/ctests/example09.ll b/benchmarks/ctests/example09.ll
new file mode 100644
index 0000000..5ff5b64
--- /dev/null
+++ b/benchmarks/ctests/example09.ll
@@ -0,0 +1,111 @@
+; ModuleID = '../../benchmarks/ctests/example09.c'
+source_filename = "../../benchmarks/ctests/example09.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %0 = alloca i32, align 4
+  %i = alloca i32, align 4
+  call void @atomic_start()
+  %call = call i32 @input()
+  store i32 %call, ptr %x, align 4
+  store i32 0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %entry, %for.inc, <null operand!>
+  %1 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %1, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %2)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, <null operand!>
+  %3 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, ptr %i, align 4
+  br label %for.cond, !llvm.loop !5
+
+for.end:                                          ; preds = %for.cond
+  call void @atomic_end()
+  store i32 0, ptr %0, align 4
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc3, %for.end
+  %4 = load i32, ptr %0, align 4
+  %5 = icmp slt i32 %4, 10
+  br i1 %5, label %for.body2, label %for.end4
+
+for.body2:                                        ; preds = %for.cond1
+  %6 = load i32, ptr %i, align 4
+  call void @log(i32 noundef %6)
+  br label %for.inc3
+
+for.inc3:                                         ; preds = %for.body2
+  %7 = load i32, ptr %0, align 4
+  %8 = add nsw i32 %7, 1
+  store i32 %8, ptr %0, align 4
+  br label %for.cond1, !llvm.loop !5
+
+for.end4:                                         ; preds = %for.cond1
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.mustprogress"}
diff --git a/benchmarks/ctests/example09.orig.ll b/benchmarks/ctests/example09.orig.ll
new file mode 100644
index 0000000..03d06bb
--- /dev/null
+++ b/benchmarks/ctests/example09.orig.ll
@@ -0,0 +1,100 @@
+; ModuleID = '../../benchmarks/ctests/example09.c'
+source_filename = "../../benchmarks/ctests/example09.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Fresh(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %i = alloca i32, align 4
+  %call = call i32 @input()
+  store i32 %call, ptr %x, align 4
+  store i32 0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %0, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32, ptr %x, align 4
+  call void @log(i32 noundef %1)
+  %2 = load i32, ptr %i, align 4
+  call void @log(i32 noundef %2)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %3 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, ptr %i, align 4
+  br label %for.cond, !llvm.loop !5
+
+for.end:                                          ; preds = %for.cond
+  %4 = load i32, ptr %x, align 4
+  call void @Fresh(i32 noundef %4)
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.mustprogress"}
diff --git a/benchmarks/ctests/example10.c b/benchmarks/ctests/example10.c
new file mode 100644
index 0000000..4e57ff7
--- /dev/null
+++ b/benchmarks/ctests/example10.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+
+void Fresh(int x) {}
+
+void atomic_start() {}
+void atomic_end() {}
+
+int input() { return 0; }
+int (*IO_NAME)() = input;
+
+void log(int x) {
+  printf("%d\n", x);
+}
+
+void app() {
+  int x = input();
+  for (int i = x; i < 10; i++) {
+    log(i + 2);
+  }
+  Fresh(x);
+}
+
+int main() {
+  app();
+}
\ No newline at end of file
diff --git a/benchmarks/ctests/example10.ll b/benchmarks/ctests/example10.ll
new file mode 100644
index 0000000..a2df8f1
--- /dev/null
+++ b/benchmarks/ctests/example10.ll
@@ -0,0 +1,92 @@
+; ModuleID = '../../benchmarks/ctests/example10.c'
+source_filename = "../../benchmarks/ctests/example10.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %i = alloca i32, align 4
+  call void @atomic_start()
+  %call = call i32 @input()
+  store i32 %call, ptr %x, align 4
+  %0 = load i32, ptr %x, align 4
+  store i32 %0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry, <null operand!>, <null operand!>
+  %1 = load i32, ptr %i, align 4
+  %2 = icmp slt i32 %1, 10
+  br i1 %2, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond, <null operand!>
+  %3 = load i32, ptr %i, align 4
+  %4 = add nsw i32 %3, 2
+  call void @log(i32 noundef %4)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, <null operand!>
+  %5 = load i32, ptr %i, align 4
+  %6 = add nsw i32 %5, 1
+  store i32 %6, ptr %i, align 4
+  br label %for.cond, !llvm.loop !5
+
+for.end:                                          ; preds = %for.cond, <null operand!>
+  call void @atomic_end()
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.mustprogress"}
diff --git a/benchmarks/ctests/example10.orig.ll b/benchmarks/ctests/example10.orig.ll
new file mode 100644
index 0000000..bbe99ff
--- /dev/null
+++ b/benchmarks/ctests/example10.orig.ll
@@ -0,0 +1,100 @@
+; ModuleID = '../../benchmarks/ctests/example10.c'
+source_filename = "../../benchmarks/ctests/example10.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@IO_NAME = global ptr @input, align 8
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @Fresh(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_start() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @atomic_end() #0 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @input() #0 {
+entry:
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @log(i32 noundef %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %0)
+  ret void
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define void @app() #0 {
+entry:
+  %x = alloca i32, align 4
+  %i = alloca i32, align 4
+  %call = call i32 @input()
+  store i32 %call, ptr %x, align 4
+  %0 = load i32, ptr %x, align 4
+  store i32 %0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %1 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %1, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32, ptr %i, align 4
+  %add = add nsw i32 %2, 2
+  call void @log(i32 noundef %add)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %3 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, ptr %i, align 4
+  br label %for.cond, !llvm.loop !5
+
+for.end:                                          ; preds = %for.cond
+  %4 = load i32, ptr %x, align 4
+  call void @Fresh(i32 noundef %4)
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 {
+entry:
+  call void @app()
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"Homebrew clang version 17.0.2"}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.mustprogress"}
diff --git a/benchmarks/ctests/example11.bc b/benchmarks/ctests/example11.bc
new file mode 100644
index 0000000000000000000000000000000000000000..076c5ac57cc33f1dd31c34eaedd8b91b7efcc204
GIT binary patch
literal 6352
zcmcgv4R8}zet&DNw6ZO))>s5Pk;KaOafX7s`miivd#VpiY+?@KbM7diBd@*?i9duS
z8^eWUZ5c~ROllZs&P}=2PMA4JFRcTFftJizlHq)ulXG&JmT(@%J`5o#<QxJV?f9<!
zKgk9Ya=EnA>yCE!z2Cm~`+vOu|NovYjV#MXa4Fy(&PV9K;Ii3AE|md51Ll%(O6yOv
ze$trzCPVF-k_4tC!3oJthx`kRyqTd|0#pdV2?=zkS^Bzzwkg=}!aR)(q0=ISvNJR&
z1Nx7^O?4I>u#S<n5=Lh}NOa4-C6*Sb$(LM$qB_HNFxz&R(J4&Q{3o(>^|6}3sHSM$
zl4vhuJ(%GclfI>3?dxOg&j?+8cmAr9RejoLhqoPI&yE4_3oJsQIx6&$%rRS6ZEih0
z4(v=gYO*p{RG$;u>Q_ihUoEmMDZG#&MMBPqk}zaANYPcPWk?}qnXA%~L1|V$+izJK
zKeA-N#Ue&}*0Di@&JQZ`rJRr}yoyagn?cAXct3WHy^H-s1iCmz8tRJ%{L!Fa)X@<Q
z2dF(I)NV96t(qLlxiF+m#FeN^ov^Ck8&X~y$~i}<KP9A8hd{NLM2-Ha2^q&aXEB$Z
zk!0}nO@_YVpqr8WH+}SH7WoxcE>xU}RGc6x`t7Cxk!8Ye8cUk{MO7!_rV)D;nnv>3
z5&Fu6<OV~HNY>{69+;j-F;+$<lrtLT#UbTIq`st4O?Il!Th(Y+2er#D0?`4E5(cAz
zzG$d~+T)Mx6ry`jf>2L&s?HDPT-4A)jyRVjjt!M`u~MU=haty$%1tS9cHF8?4B=r@
z)2a{Rs%t~a86ptf6^I%eqd_SZLef1_YPTsG9E=+Csg5HQtDGX#;EIcbs$U~i3qGX4
zhk0F%HnxtDxNKZqes^MkQRd+%gZ-R044SnMi`M!h0fFiOUVo%r8r|6u-8~XD#YE)-
z{31C#cxr;)GfOx3(d`y$Pa_qckbJ^WL4n$ndNxndA$A@=o<cUrrMU;KJ-vzo!eARX
z)MYJL-je+2%9inck?FCw|FXcC{nw$lhYSAg(ZB!q(hDzVT6*G7ND+=-0THNB8Mur7
zW5hwA{QBm93ZN0JED5&-u_JvOR3EYgGzA1R;As&wOE(TCGEfq_DYFOx*;ief*e6C3
zU}OOXW}>H|{WU5Z$$-cLh((MtWJjl}<uu9%A^{*EB=hT!zFVb49Ap(}f(fPE;jE&k
z8c`z)UEl*krm?1NQ?)2H>AuA09#y8)6ktr6>UY1KP0I(FAQUTqczlWwtf>JD%5Pmt
zBN-4|07WF$>f(#L)27@L_saLre@WaM@4qUNN(C6f;(4HLPs5$EYD>y1L<#}TH)yXC
z85ZbaUx5fg^W^@Oat1Ak1B+xwpX=>Ih=b2q90<0!B9`fo2qdfq<8W=#sRtAVxpjTL
znJXSYN80Pfo5?GVxq~r+T#sssyHm5~T?lQ9=%vw(E7x&b&|46LX8_Sl1?A|p(x)J*
zVs=w^vezb1yE~}Rz)sm!B)`GXw**=*v#Aqt6{N3^6Q_nn9vc@`joGVuMOEV+y^!%g
zfOxNo1|anVqV|qxfQ=T6=hD~v=xI*g!pSes(z6a)&y1=rX_OZ<ssyk0aWCMsp0Jxw
zh$_!SD!WA$<FktPM(PKCsvOr6s$9U?8EAw$LABo%1MMy$>VzD~j|K;#y9YW~e>O{h
zHbvhap|5iy*BE5ska9froaB|~HOf&)WnS{HeRlJh-8_(bo=Tdaio`?}{mDNEp>tgo
zABrl5ld_-ghz7PqgMmockCQE&9}N#i3*P&#{4*rK>7Zx&=*tVmUC)-NCgZrCeV|c7
zQJI`l&P=O5CDgwB&8A`d+C!0waYymIA&pU}Hys0;>u&YQu?N&otIzYwYZ}$1xav7V
zjTP3YW_d}&zKCf&Sut#{0*A~cDrf8!G3X|H&t`$8(OsaA6qnWDK<Bp041FC6o`e2^
zD~f+$RnJT>yD+V~1Pas?R18O|PDCE-PMUh{=J90ZsknJ82{u-a*PPfXr9i8l;J_`>
zf?u7ZuQ$qXOi5;0`4tXV**gxpg`qEVG<3h1l6*2n&p7D2T+y$u^U6f0>caH0>l)Qn
zjcQb*f)%``S?*(hRB^^$bxLIJkC?}|oPpH`W1;fxslm0d;O8IHzv1N5Q}ibm`6q)Q
zB}s@vM<}qxIRK%8Jsh5ZrR?msnx~TBx+++yam(l+R94lLRedS0^cA}~dK#~<97wKk
zc1@%$p9<APorBS51(^F*KT+A6tn5#kChTh=oW{XpvKq}&8?Odc-;1m2I~6|Vc20hi
zqp#1>m(gw(p;Zv~L3IslkI4!$gSr~FsI&LLf%n%9G9{i$t<LsxpXPNFGlXi7Nsyz~
z&enO`y-kkV>U1O677DxBc5g7WqqT+AmX_;FIkv>@HX3=4!DwQ2oKC++%dO$`D?v_?
z^n6*@U}KSVgE46#&&nbig`P~xgkEw*Ns-a`FUB99EI+pPq;8r1<nZy+o5{;H`}V*4
zkMDkK|4+vExAY_8DCFrH;K|MkmO%Vdd$t4yz~|Uq=80t2Rpq&%oNKTh#qrXl<LVK^
z<wp8rhkTlq&)}eyf9#-d4GW#rZrJo7n>wNp2{@Lv1P&!lu&*ADRP`q_o`J+vPEIQ)
zS>+6mpVvCo30^rdq#R2J03Rphw^;eKLk=||JC<Gu@h?o^_6v|)uqQ_<AnKqdjEO2h
zW%NGOC<(l&@yf%IibIfeqN>42B_v>95|=@qg>wYF^GGW$XQ{E|xo7?k)y(nB@xDE+
zn$ax(&s4?y9Eu>Wfq7*0ub9=~SBV&PH3mG17(obZer0Emh=8LOx|6G*95s2&Mrae-
z^qi-9reyf+g)g2>SLFqnnEdbtsDrtJPI8#b>nw3wvXYYS05cM^27%eq-zZscl|9DT
zEU1`0W;HPg9~Rda&YjuESUe3Yf;F+vSvmi0t9LmmV)wryeZGYynSunN_<&rO2SXR>
z@!Au(k1w7DTi~5X5@Mk@UxB1u@o909gaoTbwiO(?z$&HKcap-nQ~Q`DZz)z3&YjpR
zeSR<6XCU4u?{j7~*8Vfz!{FjsgqFhlVcYsMBtsSvn|9i8ATF7<z&J77j}*?G+%Nr$
z=H&_Ja*Jl8#Q+Yu=eHLh<StEduS>0!2-=4?G?K!nerRF;y^?|bXI<IIWPvkrr^!N6
zL|uqwW3t2&36U$@TC1RJorTA9#?ZKtv~k-R@(YQ5DSADV;9h0Pcj(^B;OMvoy+gk%
z%1O13lkrllY$nJk=SXXFaiaBaUzq*n_X_83XDSmmlyiIA2t)pM!P{7OtlLvPuRU>6
z_V^E>nYwruAvv_Z+BcjFjsQ~0F9xwF*yz)KS>$d&(*8}I+ejR3X*qW^bGH{{FO0#u
zE|#NM4{_ctB5?fP3(k4IFONO^HDe!=oy#t!Lp%#|fp@WN9NX!rnrKKb|5QUFX2m&U
zdXjTFu-Pvw{1DY?dn8M?J;aF*D0yUjKGP1SZ2u}-9@5V`x0F#WL{-{<_{;(^-&Wn5
z59hb8coIZof8W`>`en5GzUX@~C>eYpXCBn$#XqP^PD)O?uHYQUYhY#zv=+pWk(qz!
zvW{6&S&*`Wt9uW}48Dd~h-G-cPX5e6ux}uZtvkkOY@^Z_9<{_|n!<H4Mr*65nL>B>
z=oxlhPb_!IO@S$})o*1AJ>A_OvYu|wBxfrNWQmSImfCWK^8_g%cC+}$4yZh`lK%Kg
z@HI)b1+o;eB`C4ZsyUQEg#>bB{unPpe_|!}$J;0i)V62^8+05wgD{y}&A~>u5<EcW
z+zzBZTZIH#C$N^yxq~d64jcN#7DTWnCLgV0#9~S2BS<RFMmzC0pwWTa-J*}kn#N6n
zctGr**eXz?;+VxLWbTt3neq$Se<K;QJT2tBDY?cr3G#7?baS(idqv{^yalGCm$EKC
zx$D^Ry?cNEPT%h~KDxC1*yBm#-LsA}Rswn9RQ@`QSt9CZab3awEP@ACzYcD^23z39
z7k)2;TfokT%M0URn?rly&B7b6F}@bQ<2D|TuOGn}@I`9h!2d(>|3-X3z5)23LH}vE
z@&1l+!T`MB%v$L8!(B6v<MBM;PeFeY?yZaPGl1jG%LoduU4;L39uIs8*MYGeFzyY&
zcP$$K2H@X^z6Nak-XffUq7FZ_3vleGl>VS61t5q(&jJpnq;LW7?0Nk8MK}Un4*0iW
zTo>Rg7I?reBjE5uM;7w~UI`%1i8leqf1noR)&h?Av(tcQEy5cB2QerKIL-$=UJ5`A
za0T@50FG<T0)A+oKNFN+;K$>S&hsw?e1RY1CjiIxF9#gky`ax&!0|YI;W$X)5FAOs
zaUb8f&ZKad_;bLqUAVU2lZy%A`YQlV!@59Bo>D=-J<PkByro?2Cg*po8>{(nYx54b
z)8h?<{m!l5(TBnwEf?Aya=Kfan_FAVF5dl2ILN!bI+N3hPkm08cl(YOXUN|gg!aN$
zZMnak)0gQ@THaUY)*E~tbM-u(E*REwO?=CCXDG}=yWDT)%|>^b%Tva2Wj?*u^#?|R
z#NxJcS8?j$ZfXs+1-;PrV8BxhJYH?N(PQLvT7${sahVO6$KY-adiAY=kjdB9;ts><
z)VRK7Q;XYc;M{yuld~0?_gUwi5%TY7et=o)Dsy?u^#-%S=+S#!<zLHezDEo?>2)rh
z&czwsUZ2ZYX5#+`GhIHeoHywVCX<QRl^gHR41|NGGEcBI-~`79nqV=C>l%t-rAkYK
z;nLFkzQu5Houjz89`45*ic3rHoBEIg@c>ZR<ume}m*=%+qnUT{92Q$<a6UlQrsInF
zP%(a`qy~bmZf_`5TH4?ZHt%TRv70fSG3X6ISOBX|=jM%WJ!f!nW*=Uf9W8;ja8v7c
zJ`ex_W^V`I9BA@twT5PXM+;2gT;-frZ!+u52ENQ>_EoPBdPDyC=u8F9eB0aNSrl02
z`^JNF!rovD-vn&DhX-et^G1({(;7Hebt-bberEvwv9!XN9pT+6QfHbKlcrl~bxOHX
z-5NY+P5Q6R8W;>&!+H#y!R_J=UcK4lD>IjsYdMqAY&MxZUQfB(1@?f&WjY@pY&II!
zfUQm5HO;)cHPoTi0w;IR>{1hF@|5YloSt`^%Xpt@jeC1rDd(Iw?f)zV7rE+><=|Hj
z{a+*@5dEpL@aw4mNE&|gsSoAhy{Z4ti3oE41epkV0;%YPTy)}O1R>ub8=a|ioX?ST
yAz$d=+yN52bKp|{O*1mRnLL=14-?NPJ<iHH=fc+IoZq_^G)Qe;sm*TD<G%qbB^7i4

literal 0
HcmV?d00001

diff --git a/benchmarks/ctests/example11.ll b/benchmarks/ctests/example11.ll
new file mode 100644
index 0000000..82eea4a
--- /dev/null
+++ b/benchmarks/ctests/example11.ll
@@ -0,0 +1,179 @@
+; ModuleID = '../../benchmarks/ctests/example11.bc'
+source_filename = "example11.808d53e03ac95af8-cgu.0"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hbf6a0eaa1969aba0E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h1b5be734946d3eb7E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E" }>, align 8
+@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8
+@atomic_depth = external global i16
+
+; Function Attrs: noinline uwtable
+define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7035381af5c34fd9E(ptr %f) unnamed_addr #0 {
+start:
+  call void @_ZN4core3ops8function6FnOnce9call_once17h3bf07a824888a276E(ptr %f)
+  call void asm sideeffect "", "~{memory}"(), !srcloc !3
+  ret void
+}
+
+; Function Attrs: uwtable
+define hidden i64 @_ZN3std2rt10lang_start17h9a96c5bd5005f31bE(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 {
+start:
+  %_8 = alloca ptr, align 8
+  %_5 = alloca i64, align 8
+  store ptr %main, ptr %_8, align 8
+  %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe)
+  store i64 %0, ptr %_5, align 8
+  %v = load i64, ptr %_5, align 8, !noundef !4
+  ret i64 %v
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7035381af5c34fd9E(ptr %_4)
+  %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h22ca6c304b09fb94E"()
+  %_0 = zext i8 %self to i32
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h1b5be734946d3eb7E"(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h32b22b06cefb658aE(ptr %0)
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h32b22b06cefb658aE(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality {
+start:
+  %1 = alloca { ptr, i32 }, align 8
+  %_2 = alloca {}, align 1
+  %_1 = alloca ptr, align 8
+  store ptr %0, ptr %_1, align 8
+  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E"(ptr align 8 %_1)
+          to label %bb1 unwind label %cleanup
+
+bb3:                                              ; preds = %cleanup
+  %2 = load ptr, ptr %1, align 8, !noundef !4
+  %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  %4 = load i32, ptr %3, align 8, !noundef !4
+  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
+  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
+  resume { ptr, i32 } %6
+
+cleanup:                                          ; preds = %start
+  %7 = landingpad { ptr, i32 }
+          cleanup
+  %8 = extractvalue { ptr, i32 } %7, 0
+  %9 = extractvalue { ptr, i32 } %7, 1
+  %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
+  store ptr %8, ptr %10, align 8
+  %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  store i32 %9, ptr %11, align 8
+  br label %bb3
+
+bb1:                                              ; preds = %start
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @_ZN4core3ops8function6FnOnce9call_once17h3bf07a824888a276E(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  call void %_1()
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hbf6a0eaa1969aba0E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h22ca6c304b09fb94E"() unnamed_addr #2 {
+start:
+  ret i8 0
+}
+
+; Function Attrs: uwtable
+define dso_local i32 @input() unnamed_addr #1 {
+start:
+  ret i32 0
+}
+
+; Function Attrs: uwtable
+define dso_local void @log(i32 %i) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @app() unnamed_addr #1 {
+start:
+  call void @atomic_start()
+  %x = call i32 @input()
+  call void @log(i32 %x)
+  call void @atomic_end()
+  call void @log(i32 1)
+  ret void
+}
+
+; Function Attrs: uwtable
+define internal void @_ZN9example114main17h0b701389294a589fE() unnamed_addr #1 {
+start:
+  call void @app()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_start() unnamed_addr #1 {
+start:
+  %local = load i16, ptr @atomic_depth, align 2, !noundef !4
+  call void @start_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_end() unnamed_addr #1 {
+start:
+  call void @end_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @start_atomic() unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @end_atomic() unnamed_addr #1
+
+define i32 @main(i32 %0, ptr %1) unnamed_addr #3 {
+top:
+  %2 = sext i32 %0 to i64
+  %3 = call i64 @_ZN3std2rt10lang_start17h9a96c5bd5005f31bE(ptr @_ZN9example114main17h0b701389294a589fE, i64 %2, ptr %1, i8 0)
+  %4 = trunc i64 %3 to i32
+  ret i32 %4
+}
+
+attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 8, !"PIC Level", i32 2}
+!1 = !{i32 7, !"PIE Level", i32 2}
+!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
+!3 = !{i32 1115326}
+!4 = !{}
diff --git a/benchmarks/ctests/example11.orig.ll b/benchmarks/ctests/example11.orig.ll
new file mode 100644
index 0000000..49a1d31
--- /dev/null
+++ b/benchmarks/ctests/example11.orig.ll
@@ -0,0 +1,184 @@
+; ModuleID = '../../benchmarks/ctests/example11.bc'
+source_filename = "example11.808d53e03ac95af8-cgu.0"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hbf6a0eaa1969aba0E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h1b5be734946d3eb7E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E" }>, align 8
+@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8
+@atomic_depth = external global i16
+
+; Function Attrs: noinline uwtable
+define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7035381af5c34fd9E(ptr %f) unnamed_addr #0 {
+start:
+  call void @_ZN4core3ops8function6FnOnce9call_once17h3bf07a824888a276E(ptr %f)
+  call void asm sideeffect "", "~{memory}"(), !srcloc !3
+  ret void
+}
+
+; Function Attrs: uwtable
+define hidden i64 @_ZN3std2rt10lang_start17h9a96c5bd5005f31bE(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 {
+start:
+  %_8 = alloca ptr, align 8
+  %_5 = alloca i64, align 8
+  store ptr %main, ptr %_8, align 8
+  %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe)
+  store i64 %0, ptr %_5, align 8
+  %v = load i64, ptr %_5, align 8, !noundef !4
+  ret i64 %v
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7035381af5c34fd9E(ptr %_4)
+  %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h22ca6c304b09fb94E"()
+  %_0 = zext i8 %self to i32
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h1b5be734946d3eb7E"(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h32b22b06cefb658aE(ptr %0)
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h32b22b06cefb658aE(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality {
+start:
+  %1 = alloca { ptr, i32 }, align 8
+  %_2 = alloca {}, align 1
+  %_1 = alloca ptr, align 8
+  store ptr %0, ptr %_1, align 8
+  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17he176d602148ddb94E"(ptr align 8 %_1)
+          to label %bb1 unwind label %cleanup
+
+bb3:                                              ; preds = %cleanup
+  %2 = load ptr, ptr %1, align 8, !noundef !4
+  %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  %4 = load i32, ptr %3, align 8, !noundef !4
+  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
+  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
+  resume { ptr, i32 } %6
+
+cleanup:                                          ; preds = %start
+  %7 = landingpad { ptr, i32 }
+          cleanup
+  %8 = extractvalue { ptr, i32 } %7, 0
+  %9 = extractvalue { ptr, i32 } %7, 1
+  %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
+  store ptr %8, ptr %10, align 8
+  %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  store i32 %9, ptr %11, align 8
+  br label %bb3
+
+bb1:                                              ; preds = %start
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @_ZN4core3ops8function6FnOnce9call_once17h3bf07a824888a276E(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  call void %_1()
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hbf6a0eaa1969aba0E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h22ca6c304b09fb94E"() unnamed_addr #2 {
+start:
+  ret i8 0
+}
+
+; Function Attrs: uwtable
+define dso_local i32 @input() unnamed_addr #1 {
+start:
+  ret i32 0
+}
+
+; Function Attrs: uwtable
+define dso_local void @log(i32 %i) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @app() unnamed_addr #1 {
+start:
+  %x = call i32 @input()
+  call void @log(i32 1)
+  call void @log(i32 %x)
+  call void @Fresh(i32 %x)
+  ret void
+}
+
+; Function Attrs: uwtable
+define internal void @_ZN9example114main17h0b701389294a589fE() unnamed_addr #1 {
+start:
+  call void @app()
+  ret void
+}
+
+; Function Attrs: uwtable
+define internal void @Fresh(i32 %_var) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_start() unnamed_addr #1 {
+start:
+  %local = load i16, ptr @atomic_depth, align 2, !noundef !4
+  call void @start_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_end() unnamed_addr #1 {
+start:
+  call void @end_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @start_atomic() unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @end_atomic() unnamed_addr #1
+
+define i32 @main(i32 %0, ptr %1) unnamed_addr #3 {
+top:
+  %2 = sext i32 %0 to i64
+  %3 = call i64 @_ZN3std2rt10lang_start17h9a96c5bd5005f31bE(ptr @_ZN9example114main17h0b701389294a589fE, i64 %2, ptr %1, i8 0)
+  %4 = trunc i64 %3 to i32
+  ret i32 %4
+}
+
+attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 8, !"PIC Level", i32 2}
+!1 = !{i32 7, !"PIE Level", i32 2}
+!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
+!3 = !{i32 1115326}
+!4 = !{}
diff --git a/benchmarks/ctests/example11.rs b/benchmarks/ctests/example11.rs
new file mode 100644
index 0000000..8f355c3
--- /dev/null
+++ b/benchmarks/ctests/example11.rs
@@ -0,0 +1,26 @@
+include!("../intermittent.rs");
+
+#[no_mangle]
+fn input() -> i32 {
+    0
+}
+
+#[no_mangle]
+pub static IO_NAME: fn() -> i32 = input;
+
+#[no_mangle]
+fn log(i: i32) -> () {}
+
+#[no_mangle]
+fn app() -> () {
+    let x = input();
+    let y = 1;
+    let z = y;
+    log(z);
+    log(x);
+    Fresh(x);
+}
+
+fn main() -> () {
+    app()
+}
diff --git a/benchmarks/ctests/example12.bc b/benchmarks/ctests/example12.bc
new file mode 100644
index 0000000000000000000000000000000000000000..61bd73f2e2c1e9323279a9713566f04456214c7f
GIT binary patch
literal 8160
zcmd5=4Rljgp1)~cUtUX^ywU<Kq~(PcEC}6~kLH86JNYQ3WvZl=RTSqWFJH9SkC22?
z7{%8nGzE(mr|uSI$5v3(<LoIPi;UvgwyD^`>J(*N6b{?Uhv=YI1lP0n%<TWZv?VUg
zjJs!@*?aOn?l1TL@Bj06Z*snG&I|-C9j?Lo2>k(?#oBki000?aPARJ}zm@!oW5%y&
z$>vegH%j_P$ZIzF*Cu%#EvffO0sxMX&>drBS8Xy&I&%-qQ%VuqMj$jJL5UKeKN~Jd
zbIxw_5K&IjYU7iluykP}m6=UE=H$~?Bv_uDVcAcs(+yO{Pm|SEZDqbeWlqV=NIPwQ
zGQmAW9ZYAerESb_MCz(=#*(}x9jb?WAKcBnIRtWdFbK_q=K?Q5AF{NRr&hsj_r?KR
zS#r{%6(<rcRf{OY(>bP@3(h1^h)?NLh_nedLVTGr^`uh_eVOX(rjk3&i`~TYJ4Z}O
zmdvNC7TQ*@FB30hP>8UoI;$vNotH=^NF7A?3+OVHfGRT(s)RM?U17e(J`?#`SegoS
zM10i|f3>)&DH8Naw&qE;pmSqc=Xz4k^e6`U6{sb9z?}VVkK#&C$_Y{SXCg|{#7i3U
zBD(5`0qKUC$FVFcO(t-%Yqacho2(AWzwD5GVUk~B<b2`rP~j0#VW-v5B{q#%4MS05
zr?~h?zoE}sjK+|Byiay%g#4VA^pVRmeh-o!MQx0f7*JePDn9B_e1x*kE3?iuXP-1@
zqs>i{&DCO9+Q&-x?uf4=63CNmtqyJEBU{mcD0{Ry>ts*LM@ku=l9)<}4)x@<FpMs}
zlqULHKc-vIhvVcBR#qN14p|LHti~gvqT|-($Hc{*p(6Cfxa^A&64GJJAwy<J4f!WF
z+4T|f0+QcAVR2EP7_69g#Hs)nqH^#h1<$k=hlQ!2NvDOapj({-OvYPbv<Kh$E*kPN
ztrxUx>=iGs4*7UVlLH(bYNR3?n<88KB8E1xo`+8;g#)jR$hMBl>N;eNCdpQZBsfBT
zN=y8_WUFwm8<ho^N&K*YtddiyPnuiX(=$a{OV{2ObLRZ|=!ciOh8N!XM1w}NrRbMC
z-ge(#_rcs}f0uve@g!3#C!Lvqc+i<KA)b;bhERuGIt%g2fBhy|f}|pZOz<TlP=fTO
z)8>?!_?6-@1PPT<b&6QB6tPeu2u30rQSAPYcm2$P91L`ExiE-!lx@1>wO-U`f-V#i
z2xR&?mUyo_M0_)JvBP?i30C~<O1}t+K@bfP67k6T^32yFV$=!{gP{Z@YF#r?k|sma
z#CXLV!?UvyWVm2ROtC%B7)*%8f?#q?F-!3_A9CagawZj9vD2I?5@g?0Y*xseAQ+7m
z<FR71?j)mxAp53b_eqBx3A`DGOb`3JVsblyLc(%z!}>3IMB0@ksK5Xc(|=KBUYg(=
z6NUuGI@Dd_WD1EV&4dZ9YxZSU%%Wk2pbDNVeW_OCSj1o#@^~i9&8XV3zPrLCsDRA`
zW!U}B-}1c(8K47`q0uI1$iyv&C@;n8%i&UIcV%RzR&=x{En0bkuvR5j5|?bp4jmNb
zAfgnN<%aP*XuNO-6~r?Z7VT`INSJ&Oa{M^p8j4p8jw!t9qT)8IAslVD@RBV}l0er+
z>18DUoR(eZWg4j^d!RoHXu``T3d2J4hQ-B0*5Y<?@o-Z+Fw4&=$(FK+4=BbbZfuJ9
zm`LVus_bfqY>buHv+@h$vT>VCLl0)1S1QgZvj(_qFS`TN#|RXvxafGOC@d}<9#3y{
zNPb=|(PPe%=y}WnJ_qoTr19HC5WAU=*rC2MBL1$(magWdxT;5GH~M5(S+R4dM{%)7
zF)ZBAaf*{l#UM}yM}E*@1x7G-3HM`B1F%&aRD1MqV9|+|!qeiy-l+7gnuu?0#P16Q
zt8pO*Ga|w6Nanlu$iG1HYc|=%4%vmN4MoG`Wu5ECy!oC|0pxXVRB>@E>oZZdH>1+f
zYhAuKR5)zQoixM|0bXnBs;s!)A;%ub-jIEgQ(RGIo$t@uCd$SNE3?Kqa?SRTVK`dY
zYb^$cObirVv=+8OH`@MYGFTef4Ej)*zWrUz4_=^USAke<vai{k{`bt;7suwD8Ou5k
z3Y29Q_J)d&gyw~#hIXrQI9hb9-#8Qn8;gd^j%=hPpw&ij;Mz#$*%Pv>4*BP!<TxY0
z#9}u8#wM$$Wfxc(biW=YKOL1_w8_3@b3VSxDF&Le&Wz2us?55q%o<c?K?JWT=X;rd
zEIe*4J|;GHhK$2&k3;mq*sY*vS!A;>lKJ*0vM*Wr*r@DNll;?eSS2bF2VRtbE%q)*
z73|^Q2!ygZY&MQY!F9zDsbSM#HxPZ+s5$$5zrvgAVr64EzH$(9iM1+2n=&MUvWUGq
z@-Po`4|a-*+M`9CQNxIJIi%AtcuZQRoMqv%LDhHrv#OfYy^8g${2D8}Ixf3_wlD}S
zg}nD?moe5hX=ajNUB=`zx9{G4s-&CFa~G-9md87k`wVmsDnCRbTe+2~@HBdAZRIOs
zFaAw|po?kr_yaW!^^7WCugPbbJeNzS<J?-Efl;$+%`z3cjMXfLb#kbax|VKV4z)_(
z(*3YTT4Eb!Gb}1AJB)@I#rAe$QC^Nt_qy&Mj_MCBKdPRiIof;p*Og&$`SxA!{L?!N
zcfB&atG<&Fp_iaumw~()$^Ja31IgC4zAo@RcAIe|+HzTOq9^5w8J8*89m}uT+6xZZ
zCpI~3b{BE-3R_+;-z?bzn=DjSQv@;s=hRx?-lzfg@%^FV&S=6zP@sx)V~TT(;v$Fd
zSDLd2IK@bhVknjfT)#+uoso~(<iHIx+F}7o{H+1pe+G&y3h7V?*$3P(BrXJ%(J3G-
z5}uObqWz)5y-<AO;_grp6k<md^Bu>;&P!>8S>QH2mN@ay3#e>@n~%>4V_6rK^Z!$z
z#;@2IA*K;;o~5<mUosK3IDEl@Hj#*DGAf!|#UdbJR5~+VVk=9d?}B%cDmG_b>!@@t
z^l{^u7)wvBiG}Z91$>yww~+hUwC22OQ*x9H`{=$la|=keJ=IH=nx*q-iwWg2hs*{V
z;e+bmFPJ#Khc>y_H2TZhzGCFuQnM$Q5Hq`8q8_bZNYI%BqV)HO64)x@^>_{+NrlES
zv9oR(Axh-iGtv>&(yt=>W7ZTsKrYcy61)LmpWM4(;@BQ~=E3yF1rtYhQ;$A^wrfSF
zh@18$J1UpMj*1({AcXgGu#ZgkQ6@*o$4H>>RhLBExvem7&DkB}@83I9JMAX;Ha2e-
zJa6?&R~6Y~5;HM}_wwdm>;i<B^DdY;+C%-ME_c8_-=w_Dqy=YA`+Vp3b;3EEMwAF~
z#&+#r#b2w2rzUkzmEiZq9y%F<fIGD&f)rH*a+x;iKou!U<*yf|zhiD*a5!ZM4eJOC
zyPhV#Cats4z9bR*G(!~2r7KeA(fuY=EWhgpgbLmc6aD#PCWK)7MtYVo*X0c~JhOBB
zeTdtQB*lOQrQCS1k0$<ODoR*yF^pZ;`rJK7r7I7>tI#+mw&&j7FRasq7>azq8!v^o
zcEKlcAp#`kTO5Nl0%vJ#ol?+aYVD~pI9cM?IbAA-Jl~$s;uv)dR9jH{BC5YutVD-X
zn(10vM&R5iwUiB3ok{xY{lBft8(jU}Ei$qF@iNm(q2|J35spE825?O5cUJsZy#C>o
ze(ZBAjyjHcsT0=w_Q+EikdE7?5)p7bUkA^r7-L_CSMh6-Z1Ym!72>?hN$wjtYs(X7
zZs_Pf;S15-{YyhM5hY8;q<u^KO*`i1kR;S}U$?mfV)#=Qy^$j6#L;9?lq8<RgGSEs
zC8>!UuIuvBtj%)|r@SNN1J(_ni#|{r-u5L}=`%6IPFN<6Jdpu07kspK=lF#=EFsz^
zw$c}mS|*M?(e>E)?_RJ>y!r&z562(dw~p+A_*)OuW_H~DVU=m{Jxu1%I3`w$X+-~Q
z9PHcq@0J+PWa3>c$S46>X~c@@Itaso?I}|w_D>bLk4-z3=V74^gpN~4u2Z^N6E%#k
z>!@}WRK^K-limf^zmf9hFKpc>+L|%{nOrO}Sq*Qb%n+&p`>#T%1{POCo4E(Z88(c7
zM_04x`ir|oJI7l2gwXvYN0myG5z%i_hgPnPy{!^&80<zSTDo7`<V{MD30#DKhqqdo
zwA4MTe0!u{o!wTa8f^IPdXm)K`|nUozkZ(jL){$%&z{{i{^d%RXx=8W(&wfefc=Z>
zV;#94{JZKv?cycQPw|g^XdDwehJ$Y>+58tmo%|plTP9}FSySv2lZA5ucy)(4HHRc1
zqu7GEGQ2jvN~@MWj+Rbo6~|X(_xHFN|17wv&trd7ADrCrHG}KR$YiQah$cc|=_?Le
zZNvE~*uqx4D~9p7^)LwGO>!Uc8=HuE87VE{Hd<*Jq;||TwMmr=O4?|ZrAkIGaD@kt
zGbOEUsWY$fbf%^10eXQu96rss!|rperNEagevv?CODca-MDWN_nRvhkJDim5>|c!c
zfla<-Fk56%?oCL1j^KTjwxk0rUxrZUz+&u=fA%E<jWk<Tdo9qjr5u2L7Ij7!uLXCJ
zH&8OrmC4Kt-=%hsi;%5IyR^d8Ik*@cM5GpM6b4lC1E_>&_{v$`4E$nocakO~V#??Y
zw1iGflqAnaR3ZdrBYcTDP7P*-#orQTw)^?SR}-s89^@rw6WbU&pVULX$UMZ)c%2+V
zb$rTM@=9+#FW)Dj4sPXBk4W-*9_7=*RN1LEe%3Fk7n^q?^jPxg(y!BIPdqbN`6#nR
z{pxp))t~HsUi|xiD?d61mO>tI!t3yyDefG_^^N^M4u)fkD&PY95ZEd-{KWs?j3&hT
zYM21WFSH7t3_Nj8;5PL=e#hf++W=$0@j`z!Jn>%=2z>b(;CJHh;aW0@i{Sin3Gj00
zcftkHm>!=G(RdL0=ipiwhwlU&_w}Ig19A8zzzd<j8}Q|EINtOz{`P;v*8$!Hcp})i
zId1&>fIkKOhXKbrP3d0^M&Ad08XR|T9R4qWyP)p{+!=>2fDPvn=nnz@a2)<D;Gilh
z0pEk^G5O$!=K;rgSPnOsBH%pWFHPbVaX13}SAgFE<5mN{Xi5(F`ShgxO@L3!2mB+z
zagA;T9M^+j7s&YpaD0wy1w1(pzXmvbjXD6=OyU#(i(pVX^x>ZbA&0RXj4uZq%l{xw
zKE_u9j^%$GCm-WgfMflx0uG;PeeMGsK0?EJCg=&t#{r1@_<|6{aF|#FI9?ZX-IQGv
z76|QU0Q5i{A?LqY3jM|)=dAVQv*oMpcbo58!37)YYFu`=#}}-&KXA7u5Ok~9z?OjB
z)lgU0P;Yc{u7`qt&gD@X>~{RC$L{p3uc@~OsvG?9KJ~3iUu|^h^*X)I#pw)s&Rw8h
zF)2>%531N&u715e5ai%pU(IUuTD9Bdbr*OIdY9VxM@GVmxtsLPT;b-bZ3t}gd*I!T
z0e3FQ(P)f%73*}n3?2`sGU%}!t=^tnu_m`hqt4x=W^=iK@NhNwJ^A@HL61K_-w&!l
z=boU)m%DNe$niG#H*<cseN(-w+T(i2<Hn+$9-U4N+G)Hlr<2nPqOlZBgD+t4!q6c6
z@Y9vnuda7_w5*G(t+hA6>rI6x{T!&Ssr#PNYEEq^&}p>>y;0-Vx}1MfvT<54=)}2=
zde&g@78DqbDwo>z!)Lm5oY4)A=2(_<=`^03Gkrn7p}_5L@Yx}zzFLSg_8uew_$ip5
ze=}?t&aJTJ=2pQ4Zp+WVY3ePpiTSl!tB0^SRXQi9@jy=8bfOkVqCg0QidDq|R_Cj|
zB~*}jHNikEaFD1FymFX`--MwMxVR}0nCakF;TU6;%~%zDY3n^rIQKLvgU;*qx>Oo9
ztKl>{?1}=dpu)6yafu^%!sl;rc>*APjmKYCQ_tbhW0^X?#|KpaQe8TiQ(K^RY4vKO
zOXV%VQgs?ZYPq1|ZSru+I9Ee$t;Yq<F0T(ZOxp+IjWqz+v8!MOqsyRC=~Wt!$EooY
zc)WN8gHc!^mfW{Be~n;%oISTG1i@ER>z3m%;I#ujkIO#kGnLb*(->SHX925qy1=@c
zdf%pCZNqxb=L0p2o+b{ac~oj`9amG2om^0$F{;%CPADa<M{8J7>h}byC#gnYfysAI
zy&D8z(!nfYyqRv0j$n5^R}0cOH|J&bdQRtdvnnm?Tp`eqr`qm=zh4?)Oigf$Ak-d{
ziiO7BV(R3x`RZkO&a&A5G|ONxP$1*hvQU4V)}t}Hy#>Ys;1Yw*Xfzny9=G1*^r%!|
zZh_j%`RjDrWngQqXIUNRY6vu`R3MX`HalPKap^T)x61{z;Z_%Dmbun%%4hA9rv0Ds
zX`HM69HHKR=>LLILC}vzsaW=p#HoKu^q(QsA3XI|R-K;u7pB#zb^l-SDk$}15G#oN
zv6&T?_^VMX5Fc=>9mv&=*%egz0rYAY_;r%>V#lnh4kkko@;{~6*irBM9DA$aNfyU6
XTVbC%SlTB~mQxxCM@!)-I(_>u&eATE

literal 0
HcmV?d00001

diff --git a/benchmarks/ctests/example12.ll b/benchmarks/ctests/example12.ll
new file mode 100644
index 0000000..7438e4d
--- /dev/null
+++ b/benchmarks/ctests/example12.ll
@@ -0,0 +1,274 @@
+; ModuleID = '../../benchmarks/ctests/example12.bc'
+source_filename = "example12.2ec73fdcc3bed253-cgu.0"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E" }>, align 8
+@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8
+@atomic_depth = external global i16
+
+; Function Attrs: noinline uwtable
+define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %f) unnamed_addr #0 {
+start:
+  call void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %f)
+  call void asm sideeffect "", "~{memory}"(), !srcloc !3
+  ret void
+}
+
+; Function Attrs: uwtable
+define hidden i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 {
+start:
+  %_8 = alloca ptr, align 8
+  %_5 = alloca i64, align 8
+  store ptr %main, ptr %_8, align 8
+  %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe)
+  store i64 %0, ptr %_5, align 8
+  %v = load i64, ptr %_5, align 8, !noundef !4
+  ret i64 %v
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %_4)
+  %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"()
+  %_0 = zext i8 %self to i32
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %start1, i64 %n) unnamed_addr #2 {
+start:
+  %rhs = trunc i64 %n to i32
+  %_0 = add nsw i32 %start1, %rhs
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE"(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0)
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality {
+start:
+  %1 = alloca { ptr, i32 }, align 8
+  %_2 = alloca {}, align 1
+  %_1 = alloca ptr, align 8
+  store ptr %0, ptr %_1, align 8
+  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1)
+          to label %bb1 unwind label %cleanup
+
+bb3:                                              ; preds = %cleanup
+  %2 = load ptr, ptr %1, align 8, !noundef !4
+  %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  %4 = load i32, ptr %3, align 8, !noundef !4
+  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
+  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
+  resume { ptr, i32 } %6
+
+cleanup:                                          ; preds = %start
+  %7 = landingpad { ptr, i32 }
+          cleanup
+  %8 = extractvalue { ptr, i32 } %7, 0
+  %9 = extractvalue { ptr, i32 } %7, 1
+  %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
+  store ptr %8, ptr %10, align 8
+  %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  store i32 %9, ptr %11, align 8
+  br label %bb3
+
+bb1:                                              ; preds = %start
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  call void %_1()
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %self) unnamed_addr #2 {
+start:
+  %0 = call { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self)
+  %_0.0 = extractvalue { i32, i32 } %0, 0
+  %_0.1 = extractvalue { i32, i32 } %0, 1
+  %1 = insertvalue { i32, i32 } poison, i32 %_0.0, 0
+  %2 = insertvalue { i32, i32 } %1, i32 %_0.1, 1
+  ret { i32, i32 } %2
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"() unnamed_addr #2 {
+start:
+  ret i8 0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %self.0, i32 %self.1) unnamed_addr #2 {
+start:
+  %0 = insertvalue { i32, i32 } poison, i32 %self.0, 0
+  %1 = insertvalue { i32, i32 } %0, i32 %self.1, 1
+  ret { i32, i32 } %1
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self) unnamed_addr #2 {
+start:
+  %_0 = alloca { i32, i32 }, align 4
+  %_4 = getelementptr inbounds { i32, i32 }, ptr %self, i32 0, i32 1
+  %_3.i = load i32, ptr %self, align 4, !noundef !4
+  %_4.i = load i32, ptr %_4, align 4, !noundef !4
+  %_0.i = icmp slt i32 %_3.i, %_4.i
+  br i1 %_0.i, label %bb2, label %bb4
+
+bb4:                                              ; preds = %start
+  store i32 0, ptr %_0, align 4
+  br label %bb5
+
+bb2:                                              ; preds = %start
+  %old = load i32, ptr %self, align 4, !noundef !4
+  %_6 = call i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %old, i64 1)
+  store i32 %_6, ptr %self, align 4
+  %0 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1
+  store i32 %old, ptr %0, align 4
+  store i32 1, ptr %_0, align 4
+  br label %bb5
+
+bb5:                                              ; preds = %bb2, %bb4
+  %1 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 0
+  %2 = load i32, ptr %1, align 4, !range !5, !noundef !4
+  %3 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1
+  %4 = load i32, ptr %3, align 4
+  %5 = insertvalue { i32, i32 } poison, i32 %2, 0
+  %6 = insertvalue { i32, i32 } %5, i32 %4, 1
+  ret { i32, i32 } %6
+}
+
+; Function Attrs: uwtable
+define dso_local i32 @input() unnamed_addr #1 {
+start:
+  ret i32 0
+}
+
+; Function Attrs: uwtable
+define dso_local void @log(i32 %i) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @app() unnamed_addr #1 {
+start:
+  %_5 = alloca { i32, i32 }, align 4
+  %iter = alloca { i32, i32 }, align 4
+  %_3 = alloca { i32, i32 }, align 4
+  call void @atomic_start()
+  %x = call i32 @input()
+  store i32 0, ptr %_3, align 4
+  %0 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1
+  store i32 10, ptr %0, align 4
+  %1 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 0
+  %2 = load i32, ptr %1, align 4, !noundef !4
+  %3 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1
+  %4 = load i32, ptr %3, align 4, !noundef !4
+  %5 = call { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %2, i32 %4)
+  %_2.0 = extractvalue { i32, i32 } %5, 0
+  %_2.1 = extractvalue { i32, i32 } %5, 1
+  %6 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 0
+  store i32 %_2.0, ptr %6, align 4
+  %7 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 1
+  store i32 %_2.1, ptr %7, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb5, %start
+  %8 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %iter)
+  store { i32, i32 } %8, ptr %_5, align 4
+  %9 = load i32, ptr %_5, align 4, !range !5, !noundef !4
+  %_7 = zext i32 %9 to i64
+  %10 = icmp eq i64 %_7, 0
+  br i1 %10, label %bb7, label %bb5
+
+bb7:                                              ; preds = %bb3
+  call void @atomic_end()
+  ret void
+
+bb5:                                              ; preds = %bb3
+  call void @log(i32 1)
+  call void @log(i32 %x)
+  br label %bb3
+
+bb6:                                              ; No predecessors!
+  unreachable
+}
+
+; Function Attrs: uwtable
+define internal void @_ZN9example124main17h35539225bd174e48E() unnamed_addr #1 {
+start:
+  call void @app()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_start() unnamed_addr #1 {
+start:
+  %local = load i16, ptr @atomic_depth, align 2, !noundef !4
+  call void @start_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_end() unnamed_addr #1 {
+start:
+  call void @end_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @start_atomic() unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @end_atomic() unnamed_addr #1
+
+define i32 @main(i32 %0, ptr %1) unnamed_addr #3 {
+top:
+  %2 = sext i32 %0 to i64
+  %3 = call i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr @_ZN9example124main17h35539225bd174e48E, i64 %2, ptr %1, i8 0)
+  %4 = trunc i64 %3 to i32
+  ret i32 %4
+}
+
+attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 8, !"PIC Level", i32 2}
+!1 = !{i32 7, !"PIE Level", i32 2}
+!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
+!3 = !{i32 1453212}
+!4 = !{}
+!5 = !{i32 0, i32 2}
diff --git a/benchmarks/ctests/example12.orig.ll b/benchmarks/ctests/example12.orig.ll
new file mode 100644
index 0000000..a4c7d70
--- /dev/null
+++ b/benchmarks/ctests/example12.orig.ll
@@ -0,0 +1,279 @@
+; ModuleID = '../../benchmarks/ctests/example12.bc'
+source_filename = "example12.2ec73fdcc3bed253-cgu.0"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E" }>, align 8
+@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8
+@atomic_depth = external global i16
+
+; Function Attrs: noinline uwtable
+define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %f) unnamed_addr #0 {
+start:
+  call void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %f)
+  call void asm sideeffect "", "~{memory}"(), !srcloc !3
+  ret void
+}
+
+; Function Attrs: uwtable
+define hidden i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 {
+start:
+  %_8 = alloca ptr, align 8
+  %_5 = alloca i64, align 8
+  store ptr %main, ptr %_8, align 8
+  %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe)
+  store i64 %0, ptr %_5, align 8
+  %v = load i64, ptr %_5, align 8, !noundef !4
+  ret i64 %v
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %_4)
+  %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"()
+  %_0 = zext i8 %self to i32
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %start1, i64 %n) unnamed_addr #2 {
+start:
+  %rhs = trunc i64 %n to i32
+  %_0 = add nsw i32 %start1, %rhs
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE"(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0)
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality {
+start:
+  %1 = alloca { ptr, i32 }, align 8
+  %_2 = alloca {}, align 1
+  %_1 = alloca ptr, align 8
+  store ptr %0, ptr %_1, align 8
+  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1)
+          to label %bb1 unwind label %cleanup
+
+bb3:                                              ; preds = %cleanup
+  %2 = load ptr, ptr %1, align 8, !noundef !4
+  %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  %4 = load i32, ptr %3, align 8, !noundef !4
+  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
+  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
+  resume { ptr, i32 } %6
+
+cleanup:                                          ; preds = %start
+  %7 = landingpad { ptr, i32 }
+          cleanup
+  %8 = extractvalue { ptr, i32 } %7, 0
+  %9 = extractvalue { ptr, i32 } %7, 1
+  %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
+  store ptr %8, ptr %10, align 8
+  %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  store i32 %9, ptr %11, align 8
+  br label %bb3
+
+bb1:                                              ; preds = %start
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  call void %_1()
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %self) unnamed_addr #2 {
+start:
+  %0 = call { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self)
+  %_0.0 = extractvalue { i32, i32 } %0, 0
+  %_0.1 = extractvalue { i32, i32 } %0, 1
+  %1 = insertvalue { i32, i32 } poison, i32 %_0.0, 0
+  %2 = insertvalue { i32, i32 } %1, i32 %_0.1, 1
+  ret { i32, i32 } %2
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"() unnamed_addr #2 {
+start:
+  ret i8 0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %self.0, i32 %self.1) unnamed_addr #2 {
+start:
+  %0 = insertvalue { i32, i32 } poison, i32 %self.0, 0
+  %1 = insertvalue { i32, i32 } %0, i32 %self.1, 1
+  ret { i32, i32 } %1
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self) unnamed_addr #2 {
+start:
+  %_0 = alloca { i32, i32 }, align 4
+  %_4 = getelementptr inbounds { i32, i32 }, ptr %self, i32 0, i32 1
+  %_3.i = load i32, ptr %self, align 4, !noundef !4
+  %_4.i = load i32, ptr %_4, align 4, !noundef !4
+  %_0.i = icmp slt i32 %_3.i, %_4.i
+  br i1 %_0.i, label %bb2, label %bb4
+
+bb4:                                              ; preds = %start
+  store i32 0, ptr %_0, align 4
+  br label %bb5
+
+bb2:                                              ; preds = %start
+  %old = load i32, ptr %self, align 4, !noundef !4
+  %_6 = call i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %old, i64 1)
+  store i32 %_6, ptr %self, align 4
+  %0 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1
+  store i32 %old, ptr %0, align 4
+  store i32 1, ptr %_0, align 4
+  br label %bb5
+
+bb5:                                              ; preds = %bb2, %bb4
+  %1 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 0
+  %2 = load i32, ptr %1, align 4, !range !5, !noundef !4
+  %3 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1
+  %4 = load i32, ptr %3, align 4
+  %5 = insertvalue { i32, i32 } poison, i32 %2, 0
+  %6 = insertvalue { i32, i32 } %5, i32 %4, 1
+  ret { i32, i32 } %6
+}
+
+; Function Attrs: uwtable
+define dso_local i32 @input() unnamed_addr #1 {
+start:
+  ret i32 0
+}
+
+; Function Attrs: uwtable
+define dso_local void @log(i32 %i) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @app() unnamed_addr #1 {
+start:
+  %_5 = alloca { i32, i32 }, align 4
+  %iter = alloca { i32, i32 }, align 4
+  %_3 = alloca { i32, i32 }, align 4
+  %x = call i32 @input()
+  store i32 0, ptr %_3, align 4
+  %0 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1
+  store i32 10, ptr %0, align 4
+  %1 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 0
+  %2 = load i32, ptr %1, align 4, !noundef !4
+  %3 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1
+  %4 = load i32, ptr %3, align 4, !noundef !4
+  %5 = call { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %2, i32 %4)
+  %_2.0 = extractvalue { i32, i32 } %5, 0
+  %_2.1 = extractvalue { i32, i32 } %5, 1
+  %6 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 0
+  store i32 %_2.0, ptr %6, align 4
+  %7 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 1
+  store i32 %_2.1, ptr %7, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb5, %start
+  %8 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %iter)
+  store { i32, i32 } %8, ptr %_5, align 4
+  %9 = load i32, ptr %_5, align 4, !range !5, !noundef !4
+  %_7 = zext i32 %9 to i64
+  %10 = icmp eq i64 %_7, 0
+  br i1 %10, label %bb7, label %bb5
+
+bb7:                                              ; preds = %bb3
+  call void @Fresh(i32 %x)
+  ret void
+
+bb5:                                              ; preds = %bb3
+  call void @log(i32 1)
+  call void @log(i32 %x)
+  br label %bb3
+
+bb6:                                              ; No predecessors!
+  unreachable
+}
+
+; Function Attrs: uwtable
+define internal void @_ZN9example124main17h35539225bd174e48E() unnamed_addr #1 {
+start:
+  call void @app()
+  ret void
+}
+
+; Function Attrs: uwtable
+define internal void @Fresh(i32 %_var) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_start() unnamed_addr #1 {
+start:
+  %local = load i16, ptr @atomic_depth, align 2, !noundef !4
+  call void @start_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_end() unnamed_addr #1 {
+start:
+  call void @end_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @start_atomic() unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @end_atomic() unnamed_addr #1
+
+define i32 @main(i32 %0, ptr %1) unnamed_addr #3 {
+top:
+  %2 = sext i32 %0 to i64
+  %3 = call i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr @_ZN9example124main17h35539225bd174e48E, i64 %2, ptr %1, i8 0)
+  %4 = trunc i64 %3 to i32
+  ret i32 %4
+}
+
+attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 8, !"PIC Level", i32 2}
+!1 = !{i32 7, !"PIE Level", i32 2}
+!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
+!3 = !{i32 1453212}
+!4 = !{}
+!5 = !{i32 0, i32 2}
diff --git a/benchmarks/ctests/example12.rs b/benchmarks/ctests/example12.rs
new file mode 100644
index 0000000..05d20c3
--- /dev/null
+++ b/benchmarks/ctests/example12.rs
@@ -0,0 +1,26 @@
+include!("../intermittent.rs");
+
+#[no_mangle]
+fn input() -> i32 {
+    0
+}
+
+#[no_mangle]
+pub static IO_NAME: fn() -> i32 = input;
+
+#[no_mangle]
+fn log(i: i32) -> () {}
+
+#[no_mangle]
+fn app() -> () {
+    let x = input();
+    for _ in 0..10 {
+        log(1);
+        log(x);
+    }
+    Fresh(x);
+}
+
+fn main() -> () {
+    app()
+}
diff --git a/benchmarks/intermittent.rs b/benchmarks/intermittent.rs
index 6a14b83..8803b29 100644
--- a/benchmarks/intermittent.rs
+++ b/benchmarks/intermittent.rs
@@ -1,14 +1,14 @@
 //#![no_std]
 //#![feature(core_panic)]
 //#![feature(const_in_array_repeat_expressions)]
-extern crate panic_msp430;
-extern {
+// extern crate panic_msp430;
+extern "C" {
     fn start_atomic();
     fn end_atomic();
-	//add any externs, as from drivers, here
+    //add any externs, as from drivers, here
     fn printf(format: *const u8, ...);
     //necessary to import as the intrumentation pass needs to see this
-    static mut atomic_depth:u16;
+    static mut atomic_depth: u16;
 }
 
 /*
@@ -19,62 +19,59 @@ pub extern "C" fn _entry() {
 */
 #[allow(dead_code)]
 #[allow(non_snake_case)]
-fn Fresh<T>(_var:T) -> (){}
+#[no_mangle]
+fn Fresh<T>(_var: T) -> () {}
 
 #[allow(dead_code)]
 #[allow(non_snake_case)]
-fn Consistent<T>(_var:T, _id:u16) -> (){}
+#[no_mangle]
+fn Consistent<T>(_var: T, _id: u16) -> () {}
 
 #[allow(dead_code)]
 #[allow(non_snake_case)]
-fn FreshConsistent<T>(_var:T, _id:u16) -> (){}
+fn FreshConsistent<T>(_var: T, _id: u16) -> () {}
 
 //#[inline(always)]
 #[no_mangle]
-fn atomic_start() -> (){
+fn atomic_start() -> () {
     unsafe {
-	// variable must be visible to the omega pass
-	let local = atomic_depth;
-	start_atomic();
+        // variable must be visible to the omega pass
+        let local = atomic_depth;
+        start_atomic();
     }
 }
 #[no_mangle]
-fn atomic_end() -> (){
+fn atomic_end() -> () {
     unsafe {
-	end_atomic();
-	
+        end_atomic();
     }
 }
 
 #[macro_export]
 macro_rules! nv {
     ($name:ident : $ty:ty = $expr:expr) => {
-	unsafe {
-	    #[link_section = ".nv_vars"]
-	    static mut $name: Option<$ty> = None;
-
-	    let used = $name.is_some();
-	    if used {
-		None
-	    } else {
-		$name = Some($expr);
-		$name.as_mut()
+        unsafe {
+            #[link_section = ".nv_vars"]
+            static mut $name: Option<$ty> = None;
 
-	    }
-	}
+            let used = $name.is_some();
+            if used {
+                None
+            } else {
+                $name = Some($expr);
+                $name.as_mut()
+            }
+        }
     };
 }
 
-
 #[macro_export]
 macro_rules! big_nv {
     ($name:ident : $ty:ty = $expr:expr) => {
-	unsafe {
-	    #[link_section = ".nv_vars"]
-	    static mut $name:$ty = $expr;
-		& mut $name
-
-	    }
+        unsafe {
+            #[link_section = ".nv_vars"]
+            static mut $name: $ty = $expr;
+            &mut $name
+        }
     };
 }
-
diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile
index 53d3c55..00d4b7e 100644
--- a/ocelot/AtomicRegionInference/Makefile
+++ b/ocelot/AtomicRegionInference/Makefile
@@ -1,4 +1,4 @@
-.PHONY: clean_tests clean eg1 eg2 eg3 eg4 eg5 eg6 eg7 eg8 run_eg1 run_eg2 run_eg3 run_eg4 run_eg5 run_eg6 run_eg7 run_eg8
+.PHONY: clean_tests clean eg1 eg2 eg3 eg4 eg5 eg6 eg7 eg8 eg9 eg10 egr run_eg1 run_eg2 run_eg3 run_eg4 run_eg5 run_eg6 run_eg7 run_eg8 run_eg9 run_eg10
 
 all:
 	make eg1
@@ -9,7 +9,11 @@ all:
 	make eg6
 	make eg7
 	make eg8
-
+	make eg9
+	make eg19
+	make egr
+	make eg11
+	
 eg1:
 	TEST=example01 make test
 eg2:
@@ -26,6 +30,16 @@ eg7:
 	TEST=example07 make test
 eg8:
 	TEST=example08 make test
+eg9:
+	TEST=example09 make test
+eg10:
+	TEST=example10 make test
+egr:
+	TEST=example make testr
+eg11:
+	TEST=example11 make testr
+eg12:
+	TEST=example12 make testr
 
 run_eg1:
 	TEST=example01 make run && ../../benchmarks/ctests/example01.out
@@ -43,6 +57,10 @@ run_eg7:
 	TEST=example07 make run && ../../benchmarks/ctests/example07.out
 run_eg8:
 	TEST=example08 make run && ../../benchmarks/ctests/example08.out
+run_eg9:
+	TEST=example09 make run && ../../benchmarks/ctests/example09.out
+run_eg10:
+	TEST=example10 make run && ../../benchmarks/ctests/example10.out
 
 test:
 	$(MAKE) -C build
@@ -56,6 +74,19 @@ test:
 		../../benchmarks/ctests/$(TEST).c\
 		-o ../../benchmarks/ctests/$(TEST).ll
 
+testr:
+	$(MAKE) -C build
+	rustc ../../benchmarks/ctests/$(TEST).rs --emit llvm-bc -o ../../benchmarks/ctests/$(TEST).bc
+	clang -S -emit-llvm\
+		-fno-discard-value-names\
+		../../benchmarks/ctests/$(TEST).bc\
+		-o ../../benchmarks/ctests/$(TEST).orig.ll
+	clang -S -emit-llvm\
+		-fpass-plugin=build/src/InferAtomsPass.dylib\
+		-fno-discard-value-names\
+		../../benchmarks/ctests/$(TEST).bc\
+		-o ../../benchmarks/ctests/$(TEST).ll
+
 run:
 	$(MAKE) -C build
 	clang -fpass-plugin=build/src/InferAtomsPass.dylib\
diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
index 428adab..445b578 100644
--- a/ocelot/AtomicRegionInference/src/InferAtoms.cpp
+++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp
@@ -369,11 +369,14 @@ void InferAtomsPass::removeAnnotations(inst_vec& toDelete) {
               //* Remove args and their uses as well
               for (auto& arg : ci->args()) {
                 if (auto* argInst = dyn_cast<Instruction>(arg)) {
+                  auto argUsers = argInst->users();
+                  if (std::distance(argUsers.begin(), argUsers.end()) == 0) {
 #if DEBUG
-                  errs() << "Remove call arg: " << *argInst << "\n";
+                    errs() << "No other users, remove call arg: " << *argInst << "\n";
 #endif
-                  argInst->eraseFromParent();
-                  argInst->replaceAllUsesWith(UndefValue::get(argInst->getType()));
+                    argInst->eraseFromParent();
+                    argInst->replaceAllUsesWith(UndefValue::get(argInst->getType()));
+                  }
                 }
               }
             }
diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
index bea58e7..1ffffee 100644
--- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp
+++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp
@@ -619,10 +619,20 @@ std::set<CallInst*> findInputInsts(Module* M) {
   // Find IO_NAME annotations
   for (auto& gv : M->globals()) {
     if (gv.getName().starts_with("IO_NAME")) {
-      if (auto* ioFun = dyn_cast<Function>(gv.getInitializer())) {
+      Function* ioFun;
+
+      auto* init = gv.getInitializer();
+      if (isa<Function>(init)) {
+        ioFun = dyn_cast<Function>(init);
+      } else {
+        ioFun = dyn_cast<Function>(init->getOperand(0));
+      }
+
+      if (ioFun != nullptr) {
 #if DEBUG
         errs() << "Found IO fun: " << ioFun->getName() << "\n";
 #endif
+
         // Now, search for calls to those functions
         for (auto& F : *M) {
           for (auto& B : F) {

From fd5d5b7ca4213f8faad063ad6d299e3883c772c1 Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Wed, 13 Mar 2024 13:52:10 -0500
Subject: [PATCH 16/18] [InferAtomsPass] Rename unit test folder to "tests"

---
 .gitignore                                    |   3 +-
 benchmarks/ctests/example.bc                  | Bin 6304 -> 0 bytes
 benchmarks/ctests/example11.bc                | Bin 6352 -> 0 bytes
 benchmarks/ctests/example12.bc                | Bin 8160 -> 0 bytes
 benchmarks/{ctests => tests}/example.ll       |   2 +-
 benchmarks/{ctests => tests}/example.orig.ll  |   2 +-
 benchmarks/{ctests => tests}/example.rs       |   0
 benchmarks/{ctests => tests}/example01.c      |   0
 benchmarks/{ctests => tests}/example01.ll     |   4 +-
 .../{ctests => tests}/example01.orig.ll       |   4 +-
 benchmarks/{ctests => tests}/example02.c      |   0
 benchmarks/{ctests => tests}/example02.ll     |   4 +-
 .../{ctests => tests}/example02.orig.ll       |   4 +-
 benchmarks/{ctests => tests}/example03.c      |   0
 benchmarks/{ctests => tests}/example03.ll     |   4 +-
 .../{ctests => tests}/example03.orig.ll       |   4 +-
 benchmarks/{ctests => tests}/example04.c      |   0
 benchmarks/{ctests => tests}/example04.ll     |   4 +-
 .../{ctests => tests}/example04.orig.ll       |   4 +-
 benchmarks/{ctests => tests}/example05.c      |   0
 benchmarks/{ctests => tests}/example05.ll     |   4 +-
 .../{ctests => tests}/example05.orig.ll       |   4 +-
 benchmarks/{ctests => tests}/example06.c      |   0
 benchmarks/{ctests => tests}/example06.ll     |   4 +-
 .../{ctests => tests}/example06.orig.ll       |   4 +-
 benchmarks/{ctests => tests}/example07.c      |   0
 benchmarks/{ctests => tests}/example07.ll     |   4 +-
 .../{ctests => tests}/example07.orig.ll       |   4 +-
 benchmarks/{ctests => tests}/example08.c      |   0
 benchmarks/{ctests => tests}/example08.ll     |   4 +-
 .../{ctests => tests}/example08.orig.ll       |   4 +-
 benchmarks/{ctests => tests}/example09.c      |   0
 benchmarks/{ctests => tests}/example09.ll     |   4 +-
 .../{ctests => tests}/example09.orig.ll       |   4 +-
 benchmarks/{ctests => tests}/example10.c      |   0
 benchmarks/{ctests => tests}/example10.ll     |   4 +-
 .../{ctests => tests}/example10.orig.ll       |   4 +-
 benchmarks/{ctests => tests}/example11.ll     |   2 +-
 .../{ctests => tests}/example11.orig.ll       |   2 +-
 benchmarks/{ctests => tests}/example11.rs     |   0
 benchmarks/{ctests => tests}/example12.ll     |   0
 .../{ctests => tests}/example12.orig.ll       |   0
 benchmarks/{ctests => tests}/example12.rs     |   0
 ocelot/AtomicRegionInference/Makefile         |  46 +++++++++---------
 .../src/InferFreshCons.cpp                    |   1 -
 45 files changed, 69 insertions(+), 69 deletions(-)
 delete mode 100644 benchmarks/ctests/example.bc
 delete mode 100644 benchmarks/ctests/example11.bc
 delete mode 100644 benchmarks/ctests/example12.bc
 rename benchmarks/{ctests => tests}/example.ll (99%)
 rename benchmarks/{ctests => tests}/example.orig.ll (99%)
 rename benchmarks/{ctests => tests}/example.rs (100%)
 rename benchmarks/{ctests => tests}/example01.c (100%)
 rename benchmarks/{ctests => tests}/example01.ll (95%)
 rename benchmarks/{ctests => tests}/example01.orig.ll (96%)
 rename benchmarks/{ctests => tests}/example02.c (100%)
 rename benchmarks/{ctests => tests}/example02.ll (96%)
 rename benchmarks/{ctests => tests}/example02.orig.ll (96%)
 rename benchmarks/{ctests => tests}/example03.c (100%)
 rename benchmarks/{ctests => tests}/example03.ll (95%)
 rename benchmarks/{ctests => tests}/example03.orig.ll (96%)
 rename benchmarks/{ctests => tests}/example04.c (100%)
 rename benchmarks/{ctests => tests}/example04.ll (95%)
 rename benchmarks/{ctests => tests}/example04.orig.ll (96%)
 rename benchmarks/{ctests => tests}/example05.c (100%)
 rename benchmarks/{ctests => tests}/example05.ll (97%)
 rename benchmarks/{ctests => tests}/example05.orig.ll (96%)
 rename benchmarks/{ctests => tests}/example06.c (100%)
 rename benchmarks/{ctests => tests}/example06.ll (95%)
 rename benchmarks/{ctests => tests}/example06.orig.ll (96%)
 rename benchmarks/{ctests => tests}/example07.c (100%)
 rename benchmarks/{ctests => tests}/example07.ll (97%)
 rename benchmarks/{ctests => tests}/example07.orig.ll (96%)
 rename benchmarks/{ctests => tests}/example08.c (100%)
 rename benchmarks/{ctests => tests}/example08.ll (96%)
 rename benchmarks/{ctests => tests}/example08.orig.ll (96%)
 rename benchmarks/{ctests => tests}/example09.c (100%)
 rename benchmarks/{ctests => tests}/example09.ll (97%)
 rename benchmarks/{ctests => tests}/example09.orig.ll (96%)
 rename benchmarks/{ctests => tests}/example10.c (100%)
 rename benchmarks/{ctests => tests}/example10.ll (96%)
 rename benchmarks/{ctests => tests}/example10.orig.ll (96%)
 rename benchmarks/{ctests => tests}/example11.ll (99%)
 rename benchmarks/{ctests => tests}/example11.orig.ll (99%)
 rename benchmarks/{ctests => tests}/example11.rs (100%)
 rename benchmarks/{ctests => tests}/example12.ll (100%)
 rename benchmarks/{ctests => tests}/example12.orig.ll (100%)
 rename benchmarks/{ctests => tests}/example12.rs (100%)

diff --git a/.gitignore b/.gitignore
index fa78942..b86fab8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 .vscode
 ocelot/AtomicRegionInference/build
-benchmarks/ctests/*.out
+benchmarks/tests/*.out
+benchmarks/tests/*.bc
 
 .DS_Store
\ No newline at end of file
diff --git a/benchmarks/ctests/example.bc b/benchmarks/ctests/example.bc
deleted file mode 100644
index 6513a4190f5bf1e1cc2d48f15cd4b9bc4661b52b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6304
zcmcgv4RBM}mA=n<k{(8WdbS1FMG{Z8Lk$bwryuJF+Y|j@Vu;ho4(&1tS^4P?k@!bQ
zwy~Y0dA1Bv64Nn^8QhfJwS#v;Xwr2ErDT`RS`rYKb*RJA4nsYR9Sk8Epbi0=?J}D^
zSF*vBChc^8-srt|@AuCA?m6e4bMDo}ktGWeTne~*@(}t8F1usoVkrPLU~U<ww0)5I
zcHP2v80q#&NpMmU8kgL3%D=G68yIP0P#OksTms!GmcHhs?F#lO7^jgTbXbJY!VC?{
zfc|2*rJaSlZKLFR38S;@A-ZLch^c%v`I=i$RA$)sEVK_XI)$0adpuKD8><SQ&=juC
ziS{tIJsI9n>RkovSQle|LFj6`^HvtG?A5+Fym>c!auj%9VG#n=QGuUi4%@rdXV=1G
z_qK$yDs#c|%2Q%{?Q*K*^+Ib-!TAgd30Wgb!jR!4MOP^6pn_tVE7ZsUmD$JkSr^3*
z=JdN+#84-l8#L(jfFh6Lglyq;Yyzr*NuGrF(yQ#3*iS^Ti({y8Z!{Q)h619twrF!u
z+EFZRM-x-3iNUP%gUUo)iMrGYoBGV4^6FsLDMI}jK}p*LX=`!R7>JsYakO&=b2%7E
z22bB)=<80p0m*;cOW&}{FSBx??0BT?2vOGOF!zhB;|}v^($XiYI1)FHI4aN-lFy9L
zm&Ya78R>{*P2PV2)5|Ev%E*LrTBE!$sJwvG7d5JhPW5S<8f|ZrZV!lH>L4c-2BN{<
zXt-F~5r}LPq8%tfs3$vBrw6kxXlNlzoJ|sk2aCH{%Ba}MkfS?I<`g+QW>Y5yvDoC4
z>TFzfbx=7?1f$!7QDa>+L`lPl>Y$|U=4fajYRr?i9h9=lNkR>-xG<pl7@_sxLn-($
zx2w+1Rx=W}ovY64PV_U%T>NCPpL2&nv({nJnm{BdNZWun5NV~N+uEY-BT;iqWD?*P
z$>PCN<8;Ri-Ox+7TBRLz(&ll=r;IctNIO!`hDkci&f@Q-kPUJwdyj2rk0PHi*!%Z)
z_+C7gwPoM4eBK%3fggULQ(9UVUaEiR^Y?FbUGGQUyyM*nv7iveAT~in<-Ky*A|z;j
zbthAbWCTK1_=ZT7A$#kbEh`5%DFH(iBJ*lLB3G_L9HapPNyH$E8~Qlw*-fa9g)WXK
z2(!8>-?ux0Xv7L#5^gJEM|v~=_EsGsbV8Sc`yjFkn!Oh$k_1XZH{~EgK#oJ)VX*{B
zfQSVc2#K8=e`heH2N5YrnDOkJaROM=sDP=y`)HOtH9tnv%3EKRsZk!xFJLI8{O<2g
zp%E5(_q1C1PCu+05MwkwUCTd~|G{*>|FK07P;p8Ci-CcUxSk?O0k`4CQ#cOgc+@N=
z7E%cI)ye*wh+v@$C|ux#=RTkoAOr=3O(BRVGExe^3Zc;q0@o#-x?ho>UESNWVEJRG
zNJp)>hP>>2eNT)a*P*JS?)0F!8IV=<+KDGttmQVLcOm|c!&f~eC{Ii&{R*NY<}i0B
zd+dU=y-gbK-zK|)<kuPcmO$%ec6B1Ig4FeM;*?lq>6oZu)KSqRsu*kQfoy*PV!bLF
zgtQNeTHB&QHkv<{O<(J!r#N{dC%-g9&p2s4b3%1dqdc!sC3v-;dj+TTxWjTpRDL{C
z-YqH{n^CmZNnZ#^O}LIoO#;r$U>(#5Y3r9_pxrJ+T@e3y(NKT1y}xtSjT!pJBz=2?
zzQ&2%qmY4v%CXdQf>)l_C{I8d^O9fpIxM3OOMmKlENO-c5)+m6CI1>Gz1vlGPE<CW
zlzp%@8r&2O1tZM?oM_E?(dL0@{+XxbH<0|Mlb-ISFU=KpJzK1rh~rvzR-=T1GBK%~
zo>F~AsQr00=3&R0{gJXUXVI)7by27_ZT&UXw|eE+1M270r+MX7jp|}t^?gE(71pR`
zc*({c5%XBGY}io&4!N5spLUeRpquPDnF*Fgw}U<uE~%mZ&drw?`Wh5FC;bIi7(Z)M
zPfsm5Kc%_|3RLBn4M!@DM3#0Z%{>mwShD<B+%lR38_UP4j%=f(pw%{T;HGH)Z%)zI
z>g3laB{Qu2GKVYd9Vgw$(3dzGx?fC6KAoheo%EMn;m6l_WujAcerm}zjp~X<bwZ<p
z6}+lh=4bz|?6{-in8?x>v5ajx4yzBwLe=Z2!gVnJ-#(##%gLuE=})clPX}O@Bq0hP
zl!7g;eh3xp;pTBz%Fb?^Wikn_tALdnvz{1$s;Zi_sV~Nr{vr=YPvP~I1IcC1p^3EQ
zNyAl9*Ff||0mi=DN0j#@%lne%amN}6r!nxDtV*-U&Z|MyGjUaIr^2t?!pU!P^tBoK
z5^84=S_yF<QdhB#m@I!mNLR%ccJ}Pv{n6S1rr2Aq)!Bd0t2tn12GRP%669R(V5@zt
zzIx~S%5)>t5^naet-errYf~euEivg!IJVg1F&cTV!Dwc6oKC-5%dO`0D`1{N>U3$>
zKwTlV!I-p?CuI?hLQf`TLJzsTxX@_)7vsL8ro(HF>Xzt_4!`$)4SA_*$F2{5^5G-9
z-X7c4*oTO>AWv5V&%(@5F~q;LV^gpne2(2^8BcayQJxyix(ZuS94}2et`-|E)zP0g
z<x{MD8V9ZX6DNIZSm>0t!-fah)E0$Ez_GL`xIbxzJ#{Ej(U;749uiYIF{PYfmD4<a
zUhPyTc;)z@ax@(Pe4LQqV&zj#In;!OvGhWSe`yA{pNHgv{WwwvQ3o|)R8$5kqmQ7}
zN#HFVD<6uK?T4fjRSZPRApv`#iX+6caE5?)E@@kZt0I;qPd)$lsOm1i4DZ=fs%g!#
z-(ykqIhWona>>fyFss0?5;5wk3kDJ~f)Loe^3I(i0;(jsldX_Ct8$qqpiOAgW1iVL
zDZ}S3eDQ3$l+MkG$%i&T9n2PVk|8d)v)E(JOiH?g%t*`@0%mJpon)O&wv@43Q4xFC
zW@ZpRD6T8Gdwd6D^=@1qs)~Ki%K2~Ge9K4?yK67?awAJJ`3XXCmRy?)qVx24<8j=_
z7tey-@XjR(vCxyJKvY+JN?a%*!K#tX`3KLlN(%c!QgHX!4kqVa#qxr?M_!^{ehKX`
z5Fe5EIWz0l|8sh`NO3Moai;xw-?qLC$)HulrkyriAD2v7K~4<!;)1(JcTtZvEK9hS
zSv5~s4KV1Q-(G!?8}`e4YxN*E=e8o2lKgTY#f#O!yz|rrByDpEymQI5sr42?duT%)
zDSQ@y7G8tB8F&qBE9#g`SP<N)w~|t#I?S>$St3V5WDB>}D5Unzg7>mU(U_66b6Xhl
z3yEV9I<SD?UT4Xl(><49vEo+rbNWM3R;u+L885+#!A6Qq9BFGPN;LiGm6?D4Ucuel
z3zP{v%DTOIgdu-7w_@0StlKj?-+26}?6G~&OkF%XduAhXw54V4Xzt#g8;6Ck44!9x
zd@0sJoOg=|IHRC@!3k}ieOPwr8)Q@G#rbn*v-9avd|AMoOUBb#VK^7F;p{NiaBe4F
zAbL>%;uqU?CwuclW2yOL%lD*g{9`&lq=)V9B1SbAJ!!k}*#u&{rLrdvj(lD5ButFs
z;!e$~AD~tDr@j|!l7R<u<3U|s{aRhp_TG<o>}v~~ueBnEj7<Ny%Qk9FMSIE)uKEnN
z9bbbi#2dU1kiT#e>>;GFcgGlw{RH*Ox2!RlreJN1(b{WirohvE;yAl@XDmDCroiOe
zYoBHcyxraBSZ}v?g0q(fGerkU6t`syrwLL(Y>oI$CsY_&abJ7|)HIT60f`uiteX89
z;x|Zvt8p%P6F#86#0u<>pQ22NB&$}iL&uplU`2>qRnGM#SAYk|tlPoVXPb~fYX#O?
zlRdz~336kf*op|&%;ce!j94sLuozL|g=ic8hTu#HYIlpylU2?q1@S&{VB~2*niR*Z
z&k75=B?rd?!oq&ZDEFL@^|s{d@biNFq=cHP7qTx(0-cR89KH7HUio7`TQ>5oCI9hq
z-V49p`_;g?UxgP(^sOJ3W%%GAe*k21M13r-AK0JE;DObzh8v7Qo8ZP5elLMr$iO{_
z3y_2D47I~s4R5^0_?r8U+gKi7`#{DCZ@fR?|C;!}A>t7F1n|Ft{&R5SeH`Nioc43T
znd_n72Y1yhj^&>L{2Ay^!2R?*{3zgflQM$BYv$pf&*H&9z*m8>Z6J3D@a^;DhXDT@
z=xe~n@6E&i6L6qH+W`la)B1y+U(Mnj^Kb!h5fpnY|I$1h0WJmn5x}vJispF0u2q1;
z4-Ekhl=u9j0}$uLTY%&LTq(Z+&!d3jee0)yXU@Yn01ltgzXFc)0n1YWd>?QH^lt*5
zI}hJ8%P$6_=J>JvKg{xH13t%(@k4-P{gwib?VHo*p8&^l_`-3J!XY@`2ORhDjq6Mb
zhk<dxv0Zo%x;HN-gzH%Veg^9TG5HhtXM|dtd3U|9gj@fl>pQk5D*5K7hOHi#*B5LK
zxSsxwKHTipa^d!{%hS}*(9~#g^PcCML%heQGrL^)bmwyWwrp*5g#%3?XwQAsngUv_
zrBvtf8?`=dsn+asSI*MuLd{yPo^RaZ3ODo6HU)USyVT(4-CDm6B;A&;k%SqGT1@Vu
z)WcKX6mALmpzXzgw+MKA24Hd<dB4GLcKdlh<}rAhLOy*{Fl_d>G<urh6lz@8_++EU
zXW%@1eZ8v*n)g{}oe>UfZFqp0Gy2`7ew~gtl)AMhT}s2RWwzWS2Ay<1v(N13O?rdL
z%WHMqH|q4jNTVJ+Xz&`nZm-dxGtZkd*c>vKdP7Y?7dSpx4~tP$y|D;Zs-z^;TvBr1
zw-_#}b`}-Y!u{CBqLPyPrat6AEC3374F=w#*Lgku(o)`_Ghu~G4Xy{MT6A0yA1=bL
z)U3f!lgAehmy~Sug&MXt^4QIo&KU9qAuNEE(;2;5x8Cn9HCarY$zR#r5Ug+7!Uuyu
zZ}PSA(DfM__^pi~%<J_gj`x5HeuLR#;w#sMeBr=sWTt{<w(V>5&I>BbeZ#>q&Aw10
zUys?myq`0fc%#?LX$_paG8MJHfGY_9N}51sYjb;w)Rkt%r0G^#of57@w;GRGo&Ni>
z8iXNZSg(OIc-*|fr?+_hrIu2YmNOeI7PHyw^O`(vpH|Bm+@(4{A8IffR)eMWzSRx9
zrzzZ~)dD9sYjg<@!}JEfM`!kGO}yE-+OwslgmcXr_P>^V^Bnc3vhR<J{srj=L|>aF
z|HJ(ICh7;$=-~wXvxh#Mf%k^~Kc^tf`+tywE=WQbWT6YEAx!Zn^U#$_#M$gfm+!d_
t&KjV=`vorWe{L?Ow~z;u@nPcGbjSHv?V8)LT(djYoCc{aE49U;{|1He4g3HA

diff --git a/benchmarks/ctests/example11.bc b/benchmarks/ctests/example11.bc
deleted file mode 100644
index 076c5ac57cc33f1dd31c34eaedd8b91b7efcc204..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6352
zcmcgv4R8}zet&DNw6ZO))>s5Pk;KaOafX7s`miivd#VpiY+?@KbM7diBd@*?i9duS
z8^eWUZ5c~ROllZs&P}=2PMA4JFRcTFftJizlHq)ulXG&JmT(@%J`5o#<QxJV?f9<!
zKgk9Ya=EnA>yCE!z2Cm~`+vOu|NovYjV#MXa4Fy(&PV9K;Ii3AE|md51Ll%(O6yOv
ze$trzCPVF-k_4tC!3oJthx`kRyqTd|0#pdV2?=zkS^Bzzwkg=}!aR)(q0=ISvNJR&
z1Nx7^O?4I>u#S<n5=Lh}NOa4-C6*Sb$(LM$qB_HNFxz&R(J4&Q{3o(>^|6}3sHSM$
zl4vhuJ(%GclfI>3?dxOg&j?+8cmAr9RejoLhqoPI&yE4_3oJsQIx6&$%rRS6ZEih0
z4(v=gYO*p{RG$;u>Q_ihUoEmMDZG#&MMBPqk}zaANYPcPWk?}qnXA%~L1|V$+izJK
zKeA-N#Ue&}*0Di@&JQZ`rJRr}yoyagn?cAXct3WHy^H-s1iCmz8tRJ%{L!Fa)X@<Q
z2dF(I)NV96t(qLlxiF+m#FeN^ov^Ck8&X~y$~i}<KP9A8hd{NLM2-Ha2^q&aXEB$Z
zk!0}nO@_YVpqr8WH+}SH7WoxcE>xU}RGc6x`t7Cxk!8Ye8cUk{MO7!_rV)D;nnv>3
z5&Fu6<OV~HNY>{69+;j-F;+$<lrtLT#UbTIq`st4O?Il!Th(Y+2er#D0?`4E5(cAz
zzG$d~+T)Mx6ry`jf>2L&s?HDPT-4A)jyRVjjt!M`u~MU=haty$%1tS9cHF8?4B=r@
z)2a{Rs%t~a86ptf6^I%eqd_SZLef1_YPTsG9E=+Csg5HQtDGX#;EIcbs$U~i3qGX4
zhk0F%HnxtDxNKZqes^MkQRd+%gZ-R044SnMi`M!h0fFiOUVo%r8r|6u-8~XD#YE)-
z{31C#cxr;)GfOx3(d`y$Pa_qckbJ^WL4n$ndNxndA$A@=o<cUrrMU;KJ-vzo!eARX
z)MYJL-je+2%9inck?FCw|FXcC{nw$lhYSAg(ZB!q(hDzVT6*G7ND+=-0THNB8Mur7
zW5hwA{QBm93ZN0JED5&-u_JvOR3EYgGzA1R;As&wOE(TCGEfq_DYFOx*;ief*e6C3
zU}OOXW}>H|{WU5Z$$-cLh((MtWJjl}<uu9%A^{*EB=hT!zFVb49Ap(}f(fPE;jE&k
z8c`z)UEl*krm?1NQ?)2H>AuA09#y8)6ktr6>UY1KP0I(FAQUTqczlWwtf>JD%5Pmt
zBN-4|07WF$>f(#L)27@L_saLre@WaM@4qUNN(C6f;(4HLPs5$EYD>y1L<#}TH)yXC
z85ZbaUx5fg^W^@Oat1Ak1B+xwpX=>Ih=b2q90<0!B9`fo2qdfq<8W=#sRtAVxpjTL
znJXSYN80Pfo5?GVxq~r+T#sssyHm5~T?lQ9=%vw(E7x&b&|46LX8_Sl1?A|p(x)J*
zVs=w^vezb1yE~}Rz)sm!B)`GXw**=*v#Aqt6{N3^6Q_nn9vc@`joGVuMOEV+y^!%g
zfOxNo1|anVqV|qxfQ=T6=hD~v=xI*g!pSes(z6a)&y1=rX_OZ<ssyk0aWCMsp0Jxw
zh$_!SD!WA$<FktPM(PKCsvOr6s$9U?8EAw$LABo%1MMy$>VzD~j|K;#y9YW~e>O{h
zHbvhap|5iy*BE5ska9froaB|~HOf&)WnS{HeRlJh-8_(bo=Tdaio`?}{mDNEp>tgo
zABrl5ld_-ghz7PqgMmockCQE&9}N#i3*P&#{4*rK>7Zx&=*tVmUC)-NCgZrCeV|c7
zQJI`l&P=O5CDgwB&8A`d+C!0waYymIA&pU}Hys0;>u&YQu?N&otIzYwYZ}$1xav7V
zjTP3YW_d}&zKCf&Sut#{0*A~cDrf8!G3X|H&t`$8(OsaA6qnWDK<Bp041FC6o`e2^
zD~f+$RnJT>yD+V~1Pas?R18O|PDCE-PMUh{=J90ZsknJ82{u-a*PPfXr9i8l;J_`>
zf?u7ZuQ$qXOi5;0`4tXV**gxpg`qEVG<3h1l6*2n&p7D2T+y$u^U6f0>caH0>l)Qn
zjcQb*f)%``S?*(hRB^^$bxLIJkC?}|oPpH`W1;fxslm0d;O8IHzv1N5Q}ibm`6q)Q
zB}s@vM<}qxIRK%8Jsh5ZrR?msnx~TBx+++yam(l+R94lLRedS0^cA}~dK#~<97wKk
zc1@%$p9<APorBS51(^F*KT+A6tn5#kChTh=oW{XpvKq}&8?Odc-;1m2I~6|Vc20hi
zqp#1>m(gw(p;Zv~L3IslkI4!$gSr~FsI&LLf%n%9G9{i$t<LsxpXPNFGlXi7Nsyz~
z&enO`y-kkV>U1O677DxBc5g7WqqT+AmX_;FIkv>@HX3=4!DwQ2oKC++%dO$`D?v_?
z^n6*@U}KSVgE46#&&nbig`P~xgkEw*Ns-a`FUB99EI+pPq;8r1<nZy+o5{;H`}V*4
zkMDkK|4+vExAY_8DCFrH;K|MkmO%Vdd$t4yz~|Uq=80t2Rpq&%oNKTh#qrXl<LVK^
z<wp8rhkTlq&)}eyf9#-d4GW#rZrJo7n>wNp2{@Lv1P&!lu&*ADRP`q_o`J+vPEIQ)
zS>+6mpVvCo30^rdq#R2J03Rphw^;eKLk=||JC<Gu@h?o^_6v|)uqQ_<AnKqdjEO2h
zW%NGOC<(l&@yf%IibIfeqN>42B_v>95|=@qg>wYF^GGW$XQ{E|xo7?k)y(nB@xDE+
zn$ax(&s4?y9Eu>Wfq7*0ub9=~SBV&PH3mG17(obZer0Emh=8LOx|6G*95s2&Mrae-
z^qi-9reyf+g)g2>SLFqnnEdbtsDrtJPI8#b>nw3wvXYYS05cM^27%eq-zZscl|9DT
zEU1`0W;HPg9~Rda&YjuESUe3Yf;F+vSvmi0t9LmmV)wryeZGYynSunN_<&rO2SXR>
z@!Au(k1w7DTi~5X5@Mk@UxB1u@o909gaoTbwiO(?z$&HKcap-nQ~Q`DZz)z3&YjpR
zeSR<6XCU4u?{j7~*8Vfz!{FjsgqFhlVcYsMBtsSvn|9i8ATF7<z&J77j}*?G+%Nr$
z=H&_Ja*Jl8#Q+Yu=eHLh<StEduS>0!2-=4?G?K!nerRF;y^?|bXI<IIWPvkrr^!N6
zL|uqwW3t2&36U$@TC1RJorTA9#?ZKtv~k-R@(YQ5DSADV;9h0Pcj(^B;OMvoy+gk%
z%1O13lkrllY$nJk=SXXFaiaBaUzq*n_X_83XDSmmlyiIA2t)pM!P{7OtlLvPuRU>6
z_V^E>nYwruAvv_Z+BcjFjsQ~0F9xwF*yz)KS>$d&(*8}I+ejR3X*qW^bGH{{FO0#u
zE|#NM4{_ctB5?fP3(k4IFONO^HDe!=oy#t!Lp%#|fp@WN9NX!rnrKKb|5QUFX2m&U
zdXjTFu-Pvw{1DY?dn8M?J;aF*D0yUjKGP1SZ2u}-9@5V`x0F#WL{-{<_{;(^-&Wn5
z59hb8coIZof8W`>`en5GzUX@~C>eYpXCBn$#XqP^PD)O?uHYQUYhY#zv=+pWk(qz!
zvW{6&S&*`Wt9uW}48Dd~h-G-cPX5e6ux}uZtvkkOY@^Z_9<{_|n!<H4Mr*65nL>B>
z=oxlhPb_!IO@S$})o*1AJ>A_OvYu|wBxfrNWQmSImfCWK^8_g%cC+}$4yZh`lK%Kg
z@HI)b1+o;eB`C4ZsyUQEg#>bB{unPpe_|!}$J;0i)V62^8+05wgD{y}&A~>u5<EcW
z+zzBZTZIH#C$N^yxq~d64jcN#7DTWnCLgV0#9~S2BS<RFMmzC0pwWTa-J*}kn#N6n
zctGr**eXz?;+VxLWbTt3neq$Se<K;QJT2tBDY?cr3G#7?baS(idqv{^yalGCm$EKC
zx$D^Ry?cNEPT%h~KDxC1*yBm#-LsA}Rswn9RQ@`QSt9CZab3awEP@ACzYcD^23z39
z7k)2;TfokT%M0URn?rly&B7b6F}@bQ<2D|TuOGn}@I`9h!2d(>|3-X3z5)23LH}vE
z@&1l+!T`MB%v$L8!(B6v<MBM;PeFeY?yZaPGl1jG%LoduU4;L39uIs8*MYGeFzyY&
zcP$$K2H@X^z6Nak-XffUq7FZ_3vleGl>VS61t5q(&jJpnq;LW7?0Nk8MK}Un4*0iW
zTo>Rg7I?reBjE5uM;7w~UI`%1i8leqf1noR)&h?Av(tcQEy5cB2QerKIL-$=UJ5`A
za0T@50FG<T0)A+oKNFN+;K$>S&hsw?e1RY1CjiIxF9#gky`ax&!0|YI;W$X)5FAOs
zaUb8f&ZKad_;bLqUAVU2lZy%A`YQlV!@59Bo>D=-J<PkByro?2Cg*po8>{(nYx54b
z)8h?<{m!l5(TBnwEf?Aya=Kfan_FAVF5dl2ILN!bI+N3hPkm08cl(YOXUN|gg!aN$
zZMnak)0gQ@THaUY)*E~tbM-u(E*REwO?=CCXDG}=yWDT)%|>^b%Tva2Wj?*u^#?|R
z#NxJcS8?j$ZfXs+1-;PrV8BxhJYH?N(PQLvT7${sahVO6$KY-adiAY=kjdB9;ts><
z)VRK7Q;XYc;M{yuld~0?_gUwi5%TY7et=o)Dsy?u^#-%S=+S#!<zLHezDEo?>2)rh
z&czwsUZ2ZYX5#+`GhIHeoHywVCX<QRl^gHR41|NGGEcBI-~`79nqV=C>l%t-rAkYK
z;nLFkzQu5Houjz89`45*ic3rHoBEIg@c>ZR<ume}m*=%+qnUT{92Q$<a6UlQrsInF
zP%(a`qy~bmZf_`5TH4?ZHt%TRv70fSG3X6ISOBX|=jM%WJ!f!nW*=Uf9W8;ja8v7c
zJ`ex_W^V`I9BA@twT5PXM+;2gT;-frZ!+u52ENQ>_EoPBdPDyC=u8F9eB0aNSrl02
z`^JNF!rovD-vn&DhX-et^G1({(;7Hebt-bberEvwv9!XN9pT+6QfHbKlcrl~bxOHX
z-5NY+P5Q6R8W;>&!+H#y!R_J=UcK4lD>IjsYdMqAY&MxZUQfB(1@?f&WjY@pY&II!
zfUQm5HO;)cHPoTi0w;IR>{1hF@|5YloSt`^%Xpt@jeC1rDd(Iw?f)zV7rE+><=|Hj
z{a+*@5dEpL@aw4mNE&|gsSoAhy{Z4ti3oE41epkV0;%YPTy)}O1R>ub8=a|ioX?ST
yAz$d=+yN52bKp|{O*1mRnLL=14-?NPJ<iHH=fc+IoZq_^G)Qe;sm*TD<G%qbB^7i4

diff --git a/benchmarks/ctests/example12.bc b/benchmarks/ctests/example12.bc
deleted file mode 100644
index 61bd73f2e2c1e9323279a9713566f04456214c7f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8160
zcmd5=4Rljgp1)~cUtUX^ywU<Kq~(PcEC}6~kLH86JNYQ3WvZl=RTSqWFJH9SkC22?
z7{%8nGzE(mr|uSI$5v3(<LoIPi;UvgwyD^`>J(*N6b{?Uhv=YI1lP0n%<TWZv?VUg
zjJs!@*?aOn?l1TL@Bj06Z*snG&I|-C9j?Lo2>k(?#oBki000?aPARJ}zm@!oW5%y&
z$>vegH%j_P$ZIzF*Cu%#EvffO0sxMX&>drBS8Xy&I&%-qQ%VuqMj$jJL5UKeKN~Jd
zbIxw_5K&IjYU7iluykP}m6=UE=H$~?Bv_uDVcAcs(+yO{Pm|SEZDqbeWlqV=NIPwQ
zGQmAW9ZYAerESb_MCz(=#*(}x9jb?WAKcBnIRtWdFbK_q=K?Q5AF{NRr&hsj_r?KR
zS#r{%6(<rcRf{OY(>bP@3(h1^h)?NLh_nedLVTGr^`uh_eVOX(rjk3&i`~TYJ4Z}O
zmdvNC7TQ*@FB30hP>8UoI;$vNotH=^NF7A?3+OVHfGRT(s)RM?U17e(J`?#`SegoS
zM10i|f3>)&DH8Naw&qE;pmSqc=Xz4k^e6`U6{sb9z?}VVkK#&C$_Y{SXCg|{#7i3U
zBD(5`0qKUC$FVFcO(t-%Yqacho2(AWzwD5GVUk~B<b2`rP~j0#VW-v5B{q#%4MS05
zr?~h?zoE}sjK+|Byiay%g#4VA^pVRmeh-o!MQx0f7*JePDn9B_e1x*kE3?iuXP-1@
zqs>i{&DCO9+Q&-x?uf4=63CNmtqyJEBU{mcD0{Ry>ts*LM@ku=l9)<}4)x@<FpMs}
zlqULHKc-vIhvVcBR#qN14p|LHti~gvqT|-($Hc{*p(6Cfxa^A&64GJJAwy<J4f!WF
z+4T|f0+QcAVR2EP7_69g#Hs)nqH^#h1<$k=hlQ!2NvDOapj({-OvYPbv<Kh$E*kPN
ztrxUx>=iGs4*7UVlLH(bYNR3?n<88KB8E1xo`+8;g#)jR$hMBl>N;eNCdpQZBsfBT
zN=y8_WUFwm8<ho^N&K*YtddiyPnuiX(=$a{OV{2ObLRZ|=!ciOh8N!XM1w}NrRbMC
z-ge(#_rcs}f0uve@g!3#C!Lvqc+i<KA)b;bhERuGIt%g2fBhy|f}|pZOz<TlP=fTO
z)8>?!_?6-@1PPT<b&6QB6tPeu2u30rQSAPYcm2$P91L`ExiE-!lx@1>wO-U`f-V#i
z2xR&?mUyo_M0_)JvBP?i30C~<O1}t+K@bfP67k6T^32yFV$=!{gP{Z@YF#r?k|sma
z#CXLV!?UvyWVm2ROtC%B7)*%8f?#q?F-!3_A9CagawZj9vD2I?5@g?0Y*xseAQ+7m
z<FR71?j)mxAp53b_eqBx3A`DGOb`3JVsblyLc(%z!}>3IMB0@ksK5Xc(|=KBUYg(=
z6NUuGI@Dd_WD1EV&4dZ9YxZSU%%Wk2pbDNVeW_OCSj1o#@^~i9&8XV3zPrLCsDRA`
zW!U}B-}1c(8K47`q0uI1$iyv&C@;n8%i&UIcV%RzR&=x{En0bkuvR5j5|?bp4jmNb
zAfgnN<%aP*XuNO-6~r?Z7VT`INSJ&Oa{M^p8j4p8jw!t9qT)8IAslVD@RBV}l0er+
z>18DUoR(eZWg4j^d!RoHXu``T3d2J4hQ-B0*5Y<?@o-Z+Fw4&=$(FK+4=BbbZfuJ9
zm`LVus_bfqY>buHv+@h$vT>VCLl0)1S1QgZvj(_qFS`TN#|RXvxafGOC@d}<9#3y{
zNPb=|(PPe%=y}WnJ_qoTr19HC5WAU=*rC2MBL1$(magWdxT;5GH~M5(S+R4dM{%)7
zF)ZBAaf*{l#UM}yM}E*@1x7G-3HM`B1F%&aRD1MqV9|+|!qeiy-l+7gnuu?0#P16Q
zt8pO*Ga|w6Nanlu$iG1HYc|=%4%vmN4MoG`Wu5ECy!oC|0pxXVRB>@E>oZZdH>1+f
zYhAuKR5)zQoixM|0bXnBs;s!)A;%ub-jIEgQ(RGIo$t@uCd$SNE3?Kqa?SRTVK`dY
zYb^$cObirVv=+8OH`@MYGFTef4Ej)*zWrUz4_=^USAke<vai{k{`bt;7suwD8Ou5k
z3Y29Q_J)d&gyw~#hIXrQI9hb9-#8Qn8;gd^j%=hPpw&ij;Mz#$*%Pv>4*BP!<TxY0
z#9}u8#wM$$Wfxc(biW=YKOL1_w8_3@b3VSxDF&Le&Wz2us?55q%o<c?K?JWT=X;rd
zEIe*4J|;GHhK$2&k3;mq*sY*vS!A;>lKJ*0vM*Wr*r@DNll;?eSS2bF2VRtbE%q)*
z73|^Q2!ygZY&MQY!F9zDsbSM#HxPZ+s5$$5zrvgAVr64EzH$(9iM1+2n=&MUvWUGq
z@-Po`4|a-*+M`9CQNxIJIi%AtcuZQRoMqv%LDhHrv#OfYy^8g${2D8}Ixf3_wlD}S
zg}nD?moe5hX=ajNUB=`zx9{G4s-&CFa~G-9md87k`wVmsDnCRbTe+2~@HBdAZRIOs
zFaAw|po?kr_yaW!^^7WCugPbbJeNzS<J?-Efl;$+%`z3cjMXfLb#kbax|VKV4z)_(
z(*3YTT4Eb!Gb}1AJB)@I#rAe$QC^Nt_qy&Mj_MCBKdPRiIof;p*Og&$`SxA!{L?!N
zcfB&atG<&Fp_iaumw~()$^Ja31IgC4zAo@RcAIe|+HzTOq9^5w8J8*89m}uT+6xZZ
zCpI~3b{BE-3R_+;-z?bzn=DjSQv@;s=hRx?-lzfg@%^FV&S=6zP@sx)V~TT(;v$Fd
zSDLd2IK@bhVknjfT)#+uoso~(<iHIx+F}7o{H+1pe+G&y3h7V?*$3P(BrXJ%(J3G-
z5}uObqWz)5y-<AO;_grp6k<md^Bu>;&P!>8S>QH2mN@ay3#e>@n~%>4V_6rK^Z!$z
z#;@2IA*K;;o~5<mUosK3IDEl@Hj#*DGAf!|#UdbJR5~+VVk=9d?}B%cDmG_b>!@@t
z^l{^u7)wvBiG}Z91$>yww~+hUwC22OQ*x9H`{=$la|=keJ=IH=nx*q-iwWg2hs*{V
z;e+bmFPJ#Khc>y_H2TZhzGCFuQnM$Q5Hq`8q8_bZNYI%BqV)HO64)x@^>_{+NrlES
zv9oR(Axh-iGtv>&(yt=>W7ZTsKrYcy61)LmpWM4(;@BQ~=E3yF1rtYhQ;$A^wrfSF
zh@18$J1UpMj*1({AcXgGu#ZgkQ6@*o$4H>>RhLBExvem7&DkB}@83I9JMAX;Ha2e-
zJa6?&R~6Y~5;HM}_wwdm>;i<B^DdY;+C%-ME_c8_-=w_Dqy=YA`+Vp3b;3EEMwAF~
z#&+#r#b2w2rzUkzmEiZq9y%F<fIGD&f)rH*a+x;iKou!U<*yf|zhiD*a5!ZM4eJOC
zyPhV#Cats4z9bR*G(!~2r7KeA(fuY=EWhgpgbLmc6aD#PCWK)7MtYVo*X0c~JhOBB
zeTdtQB*lOQrQCS1k0$<ODoR*yF^pZ;`rJK7r7I7>tI#+mw&&j7FRasq7>azq8!v^o
zcEKlcAp#`kTO5Nl0%vJ#ol?+aYVD~pI9cM?IbAA-Jl~$s;uv)dR9jH{BC5YutVD-X
zn(10vM&R5iwUiB3ok{xY{lBft8(jU}Ei$qF@iNm(q2|J35spE825?O5cUJsZy#C>o
ze(ZBAjyjHcsT0=w_Q+EikdE7?5)p7bUkA^r7-L_CSMh6-Z1Ym!72>?hN$wjtYs(X7
zZs_Pf;S15-{YyhM5hY8;q<u^KO*`i1kR;S}U$?mfV)#=Qy^$j6#L;9?lq8<RgGSEs
zC8>!UuIuvBtj%)|r@SNN1J(_ni#|{r-u5L}=`%6IPFN<6Jdpu07kspK=lF#=EFsz^
zw$c}mS|*M?(e>E)?_RJ>y!r&z562(dw~p+A_*)OuW_H~DVU=m{Jxu1%I3`w$X+-~Q
z9PHcq@0J+PWa3>c$S46>X~c@@Itaso?I}|w_D>bLk4-z3=V74^gpN~4u2Z^N6E%#k
z>!@}WRK^K-limf^zmf9hFKpc>+L|%{nOrO}Sq*Qb%n+&p`>#T%1{POCo4E(Z88(c7
zM_04x`ir|oJI7l2gwXvYN0myG5z%i_hgPnPy{!^&80<zSTDo7`<V{MD30#DKhqqdo
zwA4MTe0!u{o!wTa8f^IPdXm)K`|nUozkZ(jL){$%&z{{i{^d%RXx=8W(&wfefc=Z>
zV;#94{JZKv?cycQPw|g^XdDwehJ$Y>+58tmo%|plTP9}FSySv2lZA5ucy)(4HHRc1
zqu7GEGQ2jvN~@MWj+Rbo6~|X(_xHFN|17wv&trd7ADrCrHG}KR$YiQah$cc|=_?Le
zZNvE~*uqx4D~9p7^)LwGO>!Uc8=HuE87VE{Hd<*Jq;||TwMmr=O4?|ZrAkIGaD@kt
zGbOEUsWY$fbf%^10eXQu96rss!|rperNEagevv?CODca-MDWN_nRvhkJDim5>|c!c
zfla<-Fk56%?oCL1j^KTjwxk0rUxrZUz+&u=fA%E<jWk<Tdo9qjr5u2L7Ij7!uLXCJ
zH&8OrmC4Kt-=%hsi;%5IyR^d8Ik*@cM5GpM6b4lC1E_>&_{v$`4E$nocakO~V#??Y
zw1iGflqAnaR3ZdrBYcTDP7P*-#orQTw)^?SR}-s89^@rw6WbU&pVULX$UMZ)c%2+V
zb$rTM@=9+#FW)Dj4sPXBk4W-*9_7=*RN1LEe%3Fk7n^q?^jPxg(y!BIPdqbN`6#nR
z{pxp))t~HsUi|xiD?d61mO>tI!t3yyDefG_^^N^M4u)fkD&PY95ZEd-{KWs?j3&hT
zYM21WFSH7t3_Nj8;5PL=e#hf++W=$0@j`z!Jn>%=2z>b(;CJHh;aW0@i{Sin3Gj00
zcftkHm>!=G(RdL0=ipiwhwlU&_w}Ig19A8zzzd<j8}Q|EINtOz{`P;v*8$!Hcp})i
zId1&>fIkKOhXKbrP3d0^M&Ad08XR|T9R4qWyP)p{+!=>2fDPvn=nnz@a2)<D;Gilh
z0pEk^G5O$!=K;rgSPnOsBH%pWFHPbVaX13}SAgFE<5mN{Xi5(F`ShgxO@L3!2mB+z
zagA;T9M^+j7s&YpaD0wy1w1(pzXmvbjXD6=OyU#(i(pVX^x>ZbA&0RXj4uZq%l{xw
zKE_u9j^%$GCm-WgfMflx0uG;PeeMGsK0?EJCg=&t#{r1@_<|6{aF|#FI9?ZX-IQGv
z76|QU0Q5i{A?LqY3jM|)=dAVQv*oMpcbo58!37)YYFu`=#}}-&KXA7u5Ok~9z?OjB
z)lgU0P;Yc{u7`qt&gD@X>~{RC$L{p3uc@~OsvG?9KJ~3iUu|^h^*X)I#pw)s&Rw8h
zF)2>%531N&u715e5ai%pU(IUuTD9Bdbr*OIdY9VxM@GVmxtsLPT;b-bZ3t}gd*I!T
z0e3FQ(P)f%73*}n3?2`sGU%}!t=^tnu_m`hqt4x=W^=iK@NhNwJ^A@HL61K_-w&!l
z=boU)m%DNe$niG#H*<cseN(-w+T(i2<Hn+$9-U4N+G)Hlr<2nPqOlZBgD+t4!q6c6
z@Y9vnuda7_w5*G(t+hA6>rI6x{T!&Ssr#PNYEEq^&}p>>y;0-Vx}1MfvT<54=)}2=
zde&g@78DqbDwo>z!)Lm5oY4)A=2(_<=`^03Gkrn7p}_5L@Yx}zzFLSg_8uew_$ip5
ze=}?t&aJTJ=2pQ4Zp+WVY3ePpiTSl!tB0^SRXQi9@jy=8bfOkVqCg0QidDq|R_Cj|
zB~*}jHNikEaFD1FymFX`--MwMxVR}0nCakF;TU6;%~%zDY3n^rIQKLvgU;*qx>Oo9
ztKl>{?1}=dpu)6yafu^%!sl;rc>*APjmKYCQ_tbhW0^X?#|KpaQe8TiQ(K^RY4vKO
zOXV%VQgs?ZYPq1|ZSru+I9Ee$t;Yq<F0T(ZOxp+IjWqz+v8!MOqsyRC=~Wt!$EooY
zc)WN8gHc!^mfW{Be~n;%oISTG1i@ER>z3m%;I#ujkIO#kGnLb*(->SHX925qy1=@c
zdf%pCZNqxb=L0p2o+b{ac~oj`9amG2om^0$F{;%CPADa<M{8J7>h}byC#gnYfysAI
zy&D8z(!nfYyqRv0j$n5^R}0cOH|J&bdQRtdvnnm?Tp`eqr`qm=zh4?)Oigf$Ak-d{
ziiO7BV(R3x`RZkO&a&A5G|ONxP$1*hvQU4V)}t}Hy#>Ys;1Yw*Xfzny9=G1*^r%!|
zZh_j%`RjDrWngQqXIUNRY6vu`R3MX`HalPKap^T)x61{z;Z_%Dmbun%%4hA9rv0Ds
zX`HM69HHKR=>LLILC}vzsaW=p#HoKu^q(QsA3XI|R-K;u7pB#zb^l-SDk$}15G#oN
zv6&T?_^VMX5Fc=>9mv&=*%egz0rYAY_;r%>V#lnh4kkko@;{~6*irBM9DA$aNfyU6
XTVbC%SlTB~mQxxCM@!)-I(_>u&eATE

diff --git a/benchmarks/ctests/example.ll b/benchmarks/tests/example.ll
similarity index 99%
rename from benchmarks/ctests/example.ll
rename to benchmarks/tests/example.ll
index 1fce17b..edb717f 100644
--- a/benchmarks/ctests/example.ll
+++ b/benchmarks/tests/example.ll
@@ -1,4 +1,4 @@
-; ModuleID = '../../benchmarks/ctests/example.bc'
+; ModuleID = '../../benchmarks/tests/example.bc'
 source_filename = "example.a08634fc28d17a86-cgu.0"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
diff --git a/benchmarks/ctests/example.orig.ll b/benchmarks/tests/example.orig.ll
similarity index 99%
rename from benchmarks/ctests/example.orig.ll
rename to benchmarks/tests/example.orig.ll
index 7cbde04..921c0c6 100644
--- a/benchmarks/ctests/example.orig.ll
+++ b/benchmarks/tests/example.orig.ll
@@ -1,4 +1,4 @@
-; ModuleID = '../../benchmarks/ctests/example.bc'
+; ModuleID = '../../benchmarks/tests/example.bc'
 source_filename = "example.a08634fc28d17a86-cgu.0"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
diff --git a/benchmarks/ctests/example.rs b/benchmarks/tests/example.rs
similarity index 100%
rename from benchmarks/ctests/example.rs
rename to benchmarks/tests/example.rs
diff --git a/benchmarks/ctests/example01.c b/benchmarks/tests/example01.c
similarity index 100%
rename from benchmarks/ctests/example01.c
rename to benchmarks/tests/example01.c
diff --git a/benchmarks/ctests/example01.ll b/benchmarks/tests/example01.ll
similarity index 95%
rename from benchmarks/ctests/example01.ll
rename to benchmarks/tests/example01.ll
index c38981e..1c44e5f 100644
--- a/benchmarks/ctests/example01.ll
+++ b/benchmarks/tests/example01.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example01.c'
-source_filename = "../../benchmarks/ctests/example01.c"
+; ModuleID = '../../benchmarks/tests/example01.c'
+source_filename = "../../benchmarks/tests/example01.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example01.orig.ll b/benchmarks/tests/example01.orig.ll
similarity index 96%
rename from benchmarks/ctests/example01.orig.ll
rename to benchmarks/tests/example01.orig.ll
index 68b2445..9692ddb 100644
--- a/benchmarks/ctests/example01.orig.ll
+++ b/benchmarks/tests/example01.orig.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example01.c'
-source_filename = "../../benchmarks/ctests/example01.c"
+; ModuleID = '../../benchmarks/tests/example01.c'
+source_filename = "../../benchmarks/tests/example01.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example02.c b/benchmarks/tests/example02.c
similarity index 100%
rename from benchmarks/ctests/example02.c
rename to benchmarks/tests/example02.c
diff --git a/benchmarks/ctests/example02.ll b/benchmarks/tests/example02.ll
similarity index 96%
rename from benchmarks/ctests/example02.ll
rename to benchmarks/tests/example02.ll
index 5a557b7..c6886ac 100644
--- a/benchmarks/ctests/example02.ll
+++ b/benchmarks/tests/example02.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example02.c'
-source_filename = "../../benchmarks/ctests/example02.c"
+; ModuleID = '../../benchmarks/tests/example02.c'
+source_filename = "../../benchmarks/tests/example02.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example02.orig.ll b/benchmarks/tests/example02.orig.ll
similarity index 96%
rename from benchmarks/ctests/example02.orig.ll
rename to benchmarks/tests/example02.orig.ll
index 550dc07..9ec0125 100644
--- a/benchmarks/ctests/example02.orig.ll
+++ b/benchmarks/tests/example02.orig.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example02.c'
-source_filename = "../../benchmarks/ctests/example02.c"
+; ModuleID = '../../benchmarks/tests/example02.c'
+source_filename = "../../benchmarks/tests/example02.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example03.c b/benchmarks/tests/example03.c
similarity index 100%
rename from benchmarks/ctests/example03.c
rename to benchmarks/tests/example03.c
diff --git a/benchmarks/ctests/example03.ll b/benchmarks/tests/example03.ll
similarity index 95%
rename from benchmarks/ctests/example03.ll
rename to benchmarks/tests/example03.ll
index f642b6b..a156f6a 100644
--- a/benchmarks/ctests/example03.ll
+++ b/benchmarks/tests/example03.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example03.c'
-source_filename = "../../benchmarks/ctests/example03.c"
+; ModuleID = '../../benchmarks/tests/example03.c'
+source_filename = "../../benchmarks/tests/example03.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example03.orig.ll b/benchmarks/tests/example03.orig.ll
similarity index 96%
rename from benchmarks/ctests/example03.orig.ll
rename to benchmarks/tests/example03.orig.ll
index 89a0869..5a4464d 100644
--- a/benchmarks/ctests/example03.orig.ll
+++ b/benchmarks/tests/example03.orig.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example03.c'
-source_filename = "../../benchmarks/ctests/example03.c"
+; ModuleID = '../../benchmarks/tests/example03.c'
+source_filename = "../../benchmarks/tests/example03.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example04.c b/benchmarks/tests/example04.c
similarity index 100%
rename from benchmarks/ctests/example04.c
rename to benchmarks/tests/example04.c
diff --git a/benchmarks/ctests/example04.ll b/benchmarks/tests/example04.ll
similarity index 95%
rename from benchmarks/ctests/example04.ll
rename to benchmarks/tests/example04.ll
index a3a1d72..1185b60 100644
--- a/benchmarks/ctests/example04.ll
+++ b/benchmarks/tests/example04.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example04.c'
-source_filename = "../../benchmarks/ctests/example04.c"
+; ModuleID = '../../benchmarks/tests/example04.c'
+source_filename = "../../benchmarks/tests/example04.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example04.orig.ll b/benchmarks/tests/example04.orig.ll
similarity index 96%
rename from benchmarks/ctests/example04.orig.ll
rename to benchmarks/tests/example04.orig.ll
index 32405f4..c177c2f 100644
--- a/benchmarks/ctests/example04.orig.ll
+++ b/benchmarks/tests/example04.orig.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example04.c'
-source_filename = "../../benchmarks/ctests/example04.c"
+; ModuleID = '../../benchmarks/tests/example04.c'
+source_filename = "../../benchmarks/tests/example04.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example05.c b/benchmarks/tests/example05.c
similarity index 100%
rename from benchmarks/ctests/example05.c
rename to benchmarks/tests/example05.c
diff --git a/benchmarks/ctests/example05.ll b/benchmarks/tests/example05.ll
similarity index 97%
rename from benchmarks/ctests/example05.ll
rename to benchmarks/tests/example05.ll
index aee5708..2902ef0 100644
--- a/benchmarks/ctests/example05.ll
+++ b/benchmarks/tests/example05.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example05.c'
-source_filename = "../../benchmarks/ctests/example05.c"
+; ModuleID = '../../benchmarks/tests/example05.c'
+source_filename = "../../benchmarks/tests/example05.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example05.orig.ll b/benchmarks/tests/example05.orig.ll
similarity index 96%
rename from benchmarks/ctests/example05.orig.ll
rename to benchmarks/tests/example05.orig.ll
index c9e181a..ccf8289 100644
--- a/benchmarks/ctests/example05.orig.ll
+++ b/benchmarks/tests/example05.orig.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example05.c'
-source_filename = "../../benchmarks/ctests/example05.c"
+; ModuleID = '../../benchmarks/tests/example05.c'
+source_filename = "../../benchmarks/tests/example05.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example06.c b/benchmarks/tests/example06.c
similarity index 100%
rename from benchmarks/ctests/example06.c
rename to benchmarks/tests/example06.c
diff --git a/benchmarks/ctests/example06.ll b/benchmarks/tests/example06.ll
similarity index 95%
rename from benchmarks/ctests/example06.ll
rename to benchmarks/tests/example06.ll
index fad0c8b..3cf6d2b 100644
--- a/benchmarks/ctests/example06.ll
+++ b/benchmarks/tests/example06.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example06.c'
-source_filename = "../../benchmarks/ctests/example06.c"
+; ModuleID = '../../benchmarks/tests/example06.c'
+source_filename = "../../benchmarks/tests/example06.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example06.orig.ll b/benchmarks/tests/example06.orig.ll
similarity index 96%
rename from benchmarks/ctests/example06.orig.ll
rename to benchmarks/tests/example06.orig.ll
index 4aea90e..e2cc907 100644
--- a/benchmarks/ctests/example06.orig.ll
+++ b/benchmarks/tests/example06.orig.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example06.c'
-source_filename = "../../benchmarks/ctests/example06.c"
+; ModuleID = '../../benchmarks/tests/example06.c'
+source_filename = "../../benchmarks/tests/example06.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example07.c b/benchmarks/tests/example07.c
similarity index 100%
rename from benchmarks/ctests/example07.c
rename to benchmarks/tests/example07.c
diff --git a/benchmarks/ctests/example07.ll b/benchmarks/tests/example07.ll
similarity index 97%
rename from benchmarks/ctests/example07.ll
rename to benchmarks/tests/example07.ll
index e12917a..ef3a2c6 100644
--- a/benchmarks/ctests/example07.ll
+++ b/benchmarks/tests/example07.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example07.c'
-source_filename = "../../benchmarks/ctests/example07.c"
+; ModuleID = '../../benchmarks/tests/example07.c'
+source_filename = "../../benchmarks/tests/example07.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example07.orig.ll b/benchmarks/tests/example07.orig.ll
similarity index 96%
rename from benchmarks/ctests/example07.orig.ll
rename to benchmarks/tests/example07.orig.ll
index 299b165..8b10b06 100644
--- a/benchmarks/ctests/example07.orig.ll
+++ b/benchmarks/tests/example07.orig.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example07.c'
-source_filename = "../../benchmarks/ctests/example07.c"
+; ModuleID = '../../benchmarks/tests/example07.c'
+source_filename = "../../benchmarks/tests/example07.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example08.c b/benchmarks/tests/example08.c
similarity index 100%
rename from benchmarks/ctests/example08.c
rename to benchmarks/tests/example08.c
diff --git a/benchmarks/ctests/example08.ll b/benchmarks/tests/example08.ll
similarity index 96%
rename from benchmarks/ctests/example08.ll
rename to benchmarks/tests/example08.ll
index 142b165..315670d 100644
--- a/benchmarks/ctests/example08.ll
+++ b/benchmarks/tests/example08.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example08.c'
-source_filename = "../../benchmarks/ctests/example08.c"
+; ModuleID = '../../benchmarks/tests/example08.c'
+source_filename = "../../benchmarks/tests/example08.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example08.orig.ll b/benchmarks/tests/example08.orig.ll
similarity index 96%
rename from benchmarks/ctests/example08.orig.ll
rename to benchmarks/tests/example08.orig.ll
index f0dbf25..39e141a 100644
--- a/benchmarks/ctests/example08.orig.ll
+++ b/benchmarks/tests/example08.orig.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example08.c'
-source_filename = "../../benchmarks/ctests/example08.c"
+; ModuleID = '../../benchmarks/tests/example08.c'
+source_filename = "../../benchmarks/tests/example08.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example09.c b/benchmarks/tests/example09.c
similarity index 100%
rename from benchmarks/ctests/example09.c
rename to benchmarks/tests/example09.c
diff --git a/benchmarks/ctests/example09.ll b/benchmarks/tests/example09.ll
similarity index 97%
rename from benchmarks/ctests/example09.ll
rename to benchmarks/tests/example09.ll
index 5ff5b64..02f1d07 100644
--- a/benchmarks/ctests/example09.ll
+++ b/benchmarks/tests/example09.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example09.c'
-source_filename = "../../benchmarks/ctests/example09.c"
+; ModuleID = '../../benchmarks/tests/example09.c'
+source_filename = "../../benchmarks/tests/example09.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example09.orig.ll b/benchmarks/tests/example09.orig.ll
similarity index 96%
rename from benchmarks/ctests/example09.orig.ll
rename to benchmarks/tests/example09.orig.ll
index 03d06bb..9694cbd 100644
--- a/benchmarks/ctests/example09.orig.ll
+++ b/benchmarks/tests/example09.orig.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example09.c'
-source_filename = "../../benchmarks/ctests/example09.c"
+; ModuleID = '../../benchmarks/tests/example09.c'
+source_filename = "../../benchmarks/tests/example09.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example10.c b/benchmarks/tests/example10.c
similarity index 100%
rename from benchmarks/ctests/example10.c
rename to benchmarks/tests/example10.c
diff --git a/benchmarks/ctests/example10.ll b/benchmarks/tests/example10.ll
similarity index 96%
rename from benchmarks/ctests/example10.ll
rename to benchmarks/tests/example10.ll
index a2df8f1..6741975 100644
--- a/benchmarks/ctests/example10.ll
+++ b/benchmarks/tests/example10.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example10.c'
-source_filename = "../../benchmarks/ctests/example10.c"
+; ModuleID = '../../benchmarks/tests/example10.c'
+source_filename = "../../benchmarks/tests/example10.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example10.orig.ll b/benchmarks/tests/example10.orig.ll
similarity index 96%
rename from benchmarks/ctests/example10.orig.ll
rename to benchmarks/tests/example10.orig.ll
index bbe99ff..7f3c08e 100644
--- a/benchmarks/ctests/example10.orig.ll
+++ b/benchmarks/tests/example10.orig.ll
@@ -1,5 +1,5 @@
-; ModuleID = '../../benchmarks/ctests/example10.c'
-source_filename = "../../benchmarks/ctests/example10.c"
+; ModuleID = '../../benchmarks/tests/example10.c'
+source_filename = "../../benchmarks/tests/example10.c"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
 
diff --git a/benchmarks/ctests/example11.ll b/benchmarks/tests/example11.ll
similarity index 99%
rename from benchmarks/ctests/example11.ll
rename to benchmarks/tests/example11.ll
index 82eea4a..5cc8424 100644
--- a/benchmarks/ctests/example11.ll
+++ b/benchmarks/tests/example11.ll
@@ -1,4 +1,4 @@
-; ModuleID = '../../benchmarks/ctests/example11.bc'
+; ModuleID = '../../benchmarks/tests/example11.bc'
 source_filename = "example11.808d53e03ac95af8-cgu.0"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
diff --git a/benchmarks/ctests/example11.orig.ll b/benchmarks/tests/example11.orig.ll
similarity index 99%
rename from benchmarks/ctests/example11.orig.ll
rename to benchmarks/tests/example11.orig.ll
index 49a1d31..06b5fb9 100644
--- a/benchmarks/ctests/example11.orig.ll
+++ b/benchmarks/tests/example11.orig.ll
@@ -1,4 +1,4 @@
-; ModuleID = '../../benchmarks/ctests/example11.bc'
+; ModuleID = '../../benchmarks/tests/example11.bc'
 source_filename = "example11.808d53e03ac95af8-cgu.0"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
diff --git a/benchmarks/ctests/example11.rs b/benchmarks/tests/example11.rs
similarity index 100%
rename from benchmarks/ctests/example11.rs
rename to benchmarks/tests/example11.rs
diff --git a/benchmarks/ctests/example12.ll b/benchmarks/tests/example12.ll
similarity index 100%
rename from benchmarks/ctests/example12.ll
rename to benchmarks/tests/example12.ll
diff --git a/benchmarks/ctests/example12.orig.ll b/benchmarks/tests/example12.orig.ll
similarity index 100%
rename from benchmarks/ctests/example12.orig.ll
rename to benchmarks/tests/example12.orig.ll
diff --git a/benchmarks/ctests/example12.rs b/benchmarks/tests/example12.rs
similarity index 100%
rename from benchmarks/ctests/example12.rs
rename to benchmarks/tests/example12.rs
diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile
index 00d4b7e..bacbb17 100644
--- a/ocelot/AtomicRegionInference/Makefile
+++ b/ocelot/AtomicRegionInference/Makefile
@@ -10,7 +10,7 @@ all:
 	make eg7
 	make eg8
 	make eg9
-	make eg19
+	make eg10
 	make egr
 	make eg11
 	
@@ -42,59 +42,59 @@ eg12:
 	TEST=example12 make testr
 
 run_eg1:
-	TEST=example01 make run && ../../benchmarks/ctests/example01.out
+	TEST=example01 make run && ../../benchmarks/tests/example01.out
 run_eg2:
-	TEST=example02 make run && ../../benchmarks/ctests/example02.out
+	TEST=example02 make run && ../../benchmarks/tests/example02.out
 run_eg3:
-	TEST=example03 make run && ../../benchmarks/ctests/example03.out
+	TEST=example03 make run && ../../benchmarks/tests/example03.out
 run_eg4:
-	TEST=example04 make run && ../../benchmarks/ctests/example04.out
+	TEST=example04 make run && ../../benchmarks/tests/example04.out
 run_eg5:
-	TEST=example05 make run && ../../benchmarks/ctests/example05.out
+	TEST=example05 make run && ../../benchmarks/tests/example05.out
 run_eg6:
-	TEST=example06 make run && ../../benchmarks/ctests/example06.out
+	TEST=example06 make run && ../../benchmarks/tests/example06.out
 run_eg7:
-	TEST=example07 make run && ../../benchmarks/ctests/example07.out
+	TEST=example07 make run && ../../benchmarks/tests/example07.out
 run_eg8:
-	TEST=example08 make run && ../../benchmarks/ctests/example08.out
+	TEST=example08 make run && ../../benchmarks/tests/example08.out
 run_eg9:
-	TEST=example09 make run && ../../benchmarks/ctests/example09.out
+	TEST=example09 make run && ../../benchmarks/tests/example09.out
 run_eg10:
-	TEST=example10 make run && ../../benchmarks/ctests/example10.out
+	TEST=example10 make run && ../../benchmarks/tests/example10.out
 
 test:
 	$(MAKE) -C build
 	clang -S -emit-llvm\
 		-fno-discard-value-names\
-		../../benchmarks/ctests/$(TEST).c\
-		-o ../../benchmarks/ctests/$(TEST).orig.ll
+		../../benchmarks/tests/$(TEST).c\
+		-o ../../benchmarks/tests/$(TEST).orig.ll
 	clang -S -emit-llvm\
 		-fpass-plugin=build/src/InferAtomsPass.dylib\
 		-fno-discard-value-names\
-		../../benchmarks/ctests/$(TEST).c\
-		-o ../../benchmarks/ctests/$(TEST).ll
+		../../benchmarks/tests/$(TEST).c\
+		-o ../../benchmarks/tests/$(TEST).ll
 
 testr:
 	$(MAKE) -C build
-	rustc ../../benchmarks/ctests/$(TEST).rs --emit llvm-bc -o ../../benchmarks/ctests/$(TEST).bc
+	rustc ../../benchmarks/tests/$(TEST).rs --emit llvm-bc -o ../../benchmarks/tests/$(TEST).bc
 	clang -S -emit-llvm\
 		-fno-discard-value-names\
-		../../benchmarks/ctests/$(TEST).bc\
-		-o ../../benchmarks/ctests/$(TEST).orig.ll
+		../../benchmarks/tests/$(TEST).bc\
+		-o ../../benchmarks/tests/$(TEST).orig.ll
 	clang -S -emit-llvm\
 		-fpass-plugin=build/src/InferAtomsPass.dylib\
 		-fno-discard-value-names\
-		../../benchmarks/ctests/$(TEST).bc\
-		-o ../../benchmarks/ctests/$(TEST).ll
+		../../benchmarks/tests/$(TEST).bc\
+		-o ../../benchmarks/tests/$(TEST).ll
 
 run:
 	$(MAKE) -C build
 	clang -fpass-plugin=build/src/InferAtomsPass.dylib\
-		../../benchmarks/ctests/$(TEST).c\
-		-o ../../benchmarks/ctests/$(TEST).out
+		../../benchmarks/tests/$(TEST).c\
+		-o ../../benchmarks/tests/$(TEST).out
 
 clean_tests:
-	find ../../benchmarks/ctests -name "*.ll" -exec rm -rf {} \;
+	find ../../benchmarks/tests -name "*.ll" -exec rm -rf {} \;
 
 clean:
 	rm -rf build
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index b70b112..64f4cf3 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -241,7 +241,6 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
               isAtomicBoundary = true;
           }
 
-          // TODO: Exception with the entry block to a loop (prepone untainted insts instead)
           if (find(targetInsts.begin(), targetInsts.end(), &I) == targetInsts.end() && !isa<AllocaInst>(&I) && !inExistingSet && !isAtomicBoundary) {
 #if DEBUG
             errs() << "__Should be delayed__\n";

From 883cb9c8d3fe36e8fe3f5f90d07e60b67d28323b Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Fri, 15 Mar 2024 19:58:05 -0700
Subject: [PATCH 17/18] [InferAtomsPass] Slight tweak to support Rust programs
 with loops

Only small changes are required for the
optimization to work on Rust programs involving
loops.

See tests `example.rs`, `example11.rs`, and `example12.rs`.
---
 benchmarks/intermittent.rs                    |   1 +
 benchmarks/tests/example.ll                   |   2 +-
 benchmarks/tests/example.orig.ll              |   2 +-
 benchmarks/tests/example11.ll                 |   2 +-
 benchmarks/tests/example11.orig.ll            |   2 +-
 benchmarks/tests/example12.ll                 | 274 ------------------
 benchmarks/tests/example12.orig.ll            |   4 +-
 ocelot/AtomicRegionInference/src/Helpers.cpp  |   4 +
 .../src/InferFreshCons.cpp                    |  13 +-
 9 files changed, 18 insertions(+), 286 deletions(-)
 delete mode 100644 benchmarks/tests/example12.ll

diff --git a/benchmarks/intermittent.rs b/benchmarks/intermittent.rs
index 8803b29..de3631d 100644
--- a/benchmarks/intermittent.rs
+++ b/benchmarks/intermittent.rs
@@ -29,6 +29,7 @@ fn Consistent<T>(_var: T, _id: u16) -> () {}
 
 #[allow(dead_code)]
 #[allow(non_snake_case)]
+#[no_mangle]
 fn FreshConsistent<T>(_var: T, _id: u16) -> () {}
 
 //#[inline(always)]
diff --git a/benchmarks/tests/example.ll b/benchmarks/tests/example.ll
index edb717f..40607b5 100644
--- a/benchmarks/tests/example.ll
+++ b/benchmarks/tests/example.ll
@@ -174,5 +174,5 @@ attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
 !0 = !{i32 8, !"PIC Level", i32 2}
 !1 = !{i32 7, !"PIE Level", i32 2}
 !2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
-!3 = !{i32 1115277}
+!3 = !{i32 1115290}
 !4 = !{}
diff --git a/benchmarks/tests/example.orig.ll b/benchmarks/tests/example.orig.ll
index 921c0c6..7118a00 100644
--- a/benchmarks/tests/example.orig.ll
+++ b/benchmarks/tests/example.orig.ll
@@ -179,5 +179,5 @@ attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
 !0 = !{i32 8, !"PIC Level", i32 2}
 !1 = !{i32 7, !"PIE Level", i32 2}
 !2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
-!3 = !{i32 1115277}
+!3 = !{i32 1115290}
 !4 = !{}
diff --git a/benchmarks/tests/example11.ll b/benchmarks/tests/example11.ll
index 5cc8424..05a924d 100644
--- a/benchmarks/tests/example11.ll
+++ b/benchmarks/tests/example11.ll
@@ -175,5 +175,5 @@ attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
 !0 = !{i32 8, !"PIC Level", i32 2}
 !1 = !{i32 7, !"PIE Level", i32 2}
 !2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
-!3 = !{i32 1115326}
+!3 = !{i32 1115339}
 !4 = !{}
diff --git a/benchmarks/tests/example11.orig.ll b/benchmarks/tests/example11.orig.ll
index 06b5fb9..fff931a 100644
--- a/benchmarks/tests/example11.orig.ll
+++ b/benchmarks/tests/example11.orig.ll
@@ -180,5 +180,5 @@ attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
 !0 = !{i32 8, !"PIC Level", i32 2}
 !1 = !{i32 7, !"PIE Level", i32 2}
 !2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
-!3 = !{i32 1115326}
+!3 = !{i32 1115339}
 !4 = !{}
diff --git a/benchmarks/tests/example12.ll b/benchmarks/tests/example12.ll
deleted file mode 100644
index 7438e4d..0000000
--- a/benchmarks/tests/example12.ll
+++ /dev/null
@@ -1,274 +0,0 @@
-; ModuleID = '../../benchmarks/ctests/example12.bc'
-source_filename = "example12.2ec73fdcc3bed253-cgu.0"
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64-apple-macosx12.0.0"
-
-@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E" }>, align 8
-@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8
-@atomic_depth = external global i16
-
-; Function Attrs: noinline uwtable
-define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %f) unnamed_addr #0 {
-start:
-  call void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %f)
-  call void asm sideeffect "", "~{memory}"(), !srcloc !3
-  ret void
-}
-
-; Function Attrs: uwtable
-define hidden i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 {
-start:
-  %_8 = alloca ptr, align 8
-  %_5 = alloca i64, align 8
-  store ptr %main, ptr %_8, align 8
-  %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe)
-  store i64 %0, ptr %_5, align 8
-  %v = load i64, ptr %_5, align 8, !noundef !4
-  ret i64 %v
-}
-
-; Function Attrs: inlinehint uwtable
-define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1) unnamed_addr #2 {
-start:
-  %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
-  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %_4)
-  %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"()
-  %_0 = zext i8 %self to i32
-  ret i32 %_0
-}
-
-; Function Attrs: inlinehint uwtable
-define internal i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %start1, i64 %n) unnamed_addr #2 {
-start:
-  %rhs = trunc i64 %n to i32
-  %_0 = add nsw i32 %start1, %rhs
-  ret i32 %_0
-}
-
-; Function Attrs: inlinehint uwtable
-define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE"(ptr %_1) unnamed_addr #2 {
-start:
-  %_2 = alloca {}, align 1
-  %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
-  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0)
-  ret i32 %_0
-}
-
-; Function Attrs: inlinehint uwtable
-define internal i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality {
-start:
-  %1 = alloca { ptr, i32 }, align 8
-  %_2 = alloca {}, align 1
-  %_1 = alloca ptr, align 8
-  store ptr %0, ptr %_1, align 8
-  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1)
-          to label %bb1 unwind label %cleanup
-
-bb3:                                              ; preds = %cleanup
-  %2 = load ptr, ptr %1, align 8, !noundef !4
-  %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
-  %4 = load i32, ptr %3, align 8, !noundef !4
-  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
-  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
-  resume { ptr, i32 } %6
-
-cleanup:                                          ; preds = %start
-  %7 = landingpad { ptr, i32 }
-          cleanup
-  %8 = extractvalue { ptr, i32 } %7, 0
-  %9 = extractvalue { ptr, i32 } %7, 1
-  %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
-  store ptr %8, ptr %10, align 8
-  %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
-  store i32 %9, ptr %11, align 8
-  br label %bb3
-
-bb1:                                              ; preds = %start
-  ret i32 %_0
-}
-
-; Function Attrs: inlinehint uwtable
-define internal void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %_1) unnamed_addr #2 {
-start:
-  %_2 = alloca {}, align 1
-  call void %_1()
-  ret void
-}
-
-; Function Attrs: inlinehint uwtable
-define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E"(ptr align 8 %_1) unnamed_addr #2 {
-start:
-  ret void
-}
-
-; Function Attrs: inlinehint uwtable
-define internal { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %self) unnamed_addr #2 {
-start:
-  %0 = call { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self)
-  %_0.0 = extractvalue { i32, i32 } %0, 0
-  %_0.1 = extractvalue { i32, i32 } %0, 1
-  %1 = insertvalue { i32, i32 } poison, i32 %_0.0, 0
-  %2 = insertvalue { i32, i32 } %1, i32 %_0.1, 1
-  ret { i32, i32 } %2
-}
-
-; Function Attrs: inlinehint uwtable
-define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"() unnamed_addr #2 {
-start:
-  ret i8 0
-}
-
-; Function Attrs: inlinehint uwtable
-define internal { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %self.0, i32 %self.1) unnamed_addr #2 {
-start:
-  %0 = insertvalue { i32, i32 } poison, i32 %self.0, 0
-  %1 = insertvalue { i32, i32 } %0, i32 %self.1, 1
-  ret { i32, i32 } %1
-}
-
-; Function Attrs: inlinehint uwtable
-define internal { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self) unnamed_addr #2 {
-start:
-  %_0 = alloca { i32, i32 }, align 4
-  %_4 = getelementptr inbounds { i32, i32 }, ptr %self, i32 0, i32 1
-  %_3.i = load i32, ptr %self, align 4, !noundef !4
-  %_4.i = load i32, ptr %_4, align 4, !noundef !4
-  %_0.i = icmp slt i32 %_3.i, %_4.i
-  br i1 %_0.i, label %bb2, label %bb4
-
-bb4:                                              ; preds = %start
-  store i32 0, ptr %_0, align 4
-  br label %bb5
-
-bb2:                                              ; preds = %start
-  %old = load i32, ptr %self, align 4, !noundef !4
-  %_6 = call i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %old, i64 1)
-  store i32 %_6, ptr %self, align 4
-  %0 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1
-  store i32 %old, ptr %0, align 4
-  store i32 1, ptr %_0, align 4
-  br label %bb5
-
-bb5:                                              ; preds = %bb2, %bb4
-  %1 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 0
-  %2 = load i32, ptr %1, align 4, !range !5, !noundef !4
-  %3 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1
-  %4 = load i32, ptr %3, align 4
-  %5 = insertvalue { i32, i32 } poison, i32 %2, 0
-  %6 = insertvalue { i32, i32 } %5, i32 %4, 1
-  ret { i32, i32 } %6
-}
-
-; Function Attrs: uwtable
-define dso_local i32 @input() unnamed_addr #1 {
-start:
-  ret i32 0
-}
-
-; Function Attrs: uwtable
-define dso_local void @log(i32 %i) unnamed_addr #1 {
-start:
-  ret void
-}
-
-; Function Attrs: uwtable
-define dso_local void @app() unnamed_addr #1 {
-start:
-  %_5 = alloca { i32, i32 }, align 4
-  %iter = alloca { i32, i32 }, align 4
-  %_3 = alloca { i32, i32 }, align 4
-  call void @atomic_start()
-  %x = call i32 @input()
-  store i32 0, ptr %_3, align 4
-  %0 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1
-  store i32 10, ptr %0, align 4
-  %1 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 0
-  %2 = load i32, ptr %1, align 4, !noundef !4
-  %3 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1
-  %4 = load i32, ptr %3, align 4, !noundef !4
-  %5 = call { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %2, i32 %4)
-  %_2.0 = extractvalue { i32, i32 } %5, 0
-  %_2.1 = extractvalue { i32, i32 } %5, 1
-  %6 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 0
-  store i32 %_2.0, ptr %6, align 4
-  %7 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 1
-  store i32 %_2.1, ptr %7, align 4
-  br label %bb3
-
-bb3:                                              ; preds = %bb5, %start
-  %8 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %iter)
-  store { i32, i32 } %8, ptr %_5, align 4
-  %9 = load i32, ptr %_5, align 4, !range !5, !noundef !4
-  %_7 = zext i32 %9 to i64
-  %10 = icmp eq i64 %_7, 0
-  br i1 %10, label %bb7, label %bb5
-
-bb7:                                              ; preds = %bb3
-  call void @atomic_end()
-  ret void
-
-bb5:                                              ; preds = %bb3
-  call void @log(i32 1)
-  call void @log(i32 %x)
-  br label %bb3
-
-bb6:                                              ; No predecessors!
-  unreachable
-}
-
-; Function Attrs: uwtable
-define internal void @_ZN9example124main17h35539225bd174e48E() unnamed_addr #1 {
-start:
-  call void @app()
-  ret void
-}
-
-; Function Attrs: uwtable
-define dso_local void @atomic_start() unnamed_addr #1 {
-start:
-  %local = load i16, ptr @atomic_depth, align 2, !noundef !4
-  call void @start_atomic()
-  ret void
-}
-
-; Function Attrs: uwtable
-define dso_local void @atomic_end() unnamed_addr #1 {
-start:
-  call void @end_atomic()
-  ret void
-}
-
-; Function Attrs: uwtable
-declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1
-
-; Function Attrs: uwtable
-declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1
-
-; Function Attrs: uwtable
-declare void @start_atomic() unnamed_addr #1
-
-; Function Attrs: uwtable
-declare void @end_atomic() unnamed_addr #1
-
-define i32 @main(i32 %0, ptr %1) unnamed_addr #3 {
-top:
-  %2 = sext i32 %0 to i64
-  %3 = call i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr @_ZN9example124main17h35539225bd174e48E, i64 %2, ptr %1, i8 0)
-  %4 = trunc i64 %3 to i32
-  ret i32 %4
-}
-
-attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
-attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
-attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
-attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
-
-!llvm.module.flags = !{!0, !1}
-!llvm.ident = !{!2}
-
-!0 = !{i32 8, !"PIC Level", i32 2}
-!1 = !{i32 7, !"PIE Level", i32 2}
-!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
-!3 = !{i32 1453212}
-!4 = !{}
-!5 = !{i32 0, i32 2}
diff --git a/benchmarks/tests/example12.orig.ll b/benchmarks/tests/example12.orig.ll
index a4c7d70..3ccefe2 100644
--- a/benchmarks/tests/example12.orig.ll
+++ b/benchmarks/tests/example12.orig.ll
@@ -1,4 +1,4 @@
-; ModuleID = '../../benchmarks/ctests/example12.bc'
+; ModuleID = '../../benchmarks/tests/example12.bc'
 source_filename = "example12.2ec73fdcc3bed253-cgu.0"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-macosx12.0.0"
@@ -274,6 +274,6 @@ attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
 !0 = !{i32 8, !"PIC Level", i32 2}
 !1 = !{i32 7, !"PIE Level", i32 2}
 !2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
-!3 = !{i32 1453212}
+!3 = !{i32 1453225}
 !4 = !{}
 !5 = !{i32 0, i32 2}
diff --git a/ocelot/AtomicRegionInference/src/Helpers.cpp b/ocelot/AtomicRegionInference/src/Helpers.cpp
index e446001..d71bf11 100644
--- a/ocelot/AtomicRegionInference/src/Helpers.cpp
+++ b/ocelot/AtomicRegionInference/src/Helpers.cpp
@@ -80,6 +80,10 @@ void patchClonedBlock(BasicBlock* block, inst_inst_map clonedInsts) {
       auto* cond = dyn_cast<Instruction>(ci->getOperand(0));
       inst_inst_map::iterator it = clonedInsts.find(cond);
       if (it != clonedInsts.end()) ci->setOperand(0, it->second);
+    } else if (auto* ei = dyn_cast<ExtractValueInst>(&I)) {
+      auto* operand = dyn_cast<Instruction>(ei->getOperand(0));
+      inst_inst_map::iterator it = clonedInsts.find(operand);
+      if (it != clonedInsts.end()) ei->setOperand(0, it->second);
     }
   }
 }
diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
index 64f4cf3..eb53e0f 100644
--- a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
+++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp
@@ -314,7 +314,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
         inst_inst_map instClones;
 
         auto loopBlocks = taintedLoop->getBlocks();
-        assert(loopBlocks.size() == 3);
+        // assert(loopBlocks.size() == 3);
 
         for (int i = 0; i < loopBlocks.size(); i++) {
           auto* block = loopBlocks[i];
@@ -366,7 +366,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
                   assert(bi->isConditional());
                   bi->setCondition(prev);
 
-                  if (auto* B = dyn_cast<BasicBlock>(bi->getOperand(1))) {
+                  if (auto* B = dyn_cast<BasicBlock>(bi->getOperand(2))) {
+                    // errs() << "ayo: " << *B << "\n";
                     forEnd = B;
                   }
                 }
@@ -403,7 +404,7 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
           clonedLoop.push_back(clonedBlock);
         }
 
-        BasicBlock* forEndClone = BasicBlock::Create(forEnd->getContext(), forEnd->getName(), homeFun);
+        auto* forEndClone = BasicBlock::Create(forEnd->getContext(), forEnd->getName(), homeFun);
         IRBuilder builder(forEndClone);
         for (auto& I : *forEnd) {
           if (!isa<CallInst>(I) && !isa<LoadInst>(I)) {
@@ -438,7 +439,8 @@ void InferFreshCons::addRegion(inst_vec targetInsts, inst_vec_vec* other, inst_v
               }
               // for.body
               else if (i == 1) {
-                bi->setSuccessor(0, clonedLoop[2]);
+                // bi->setSuccessor(0, clonedLoop[2]);
+                bi->setSuccessor(0, clonedLoop[0]);
               }
               // for.inc
               else if (i == 2) {
@@ -653,8 +655,7 @@ Function* InferFreshCons::findCandidate(std::map<Instruction*, BasicBlock*> bloc
   if (funList.size() == 1) return funList.at(0);
 
   /* Algo goal: get the deepest function that still calls (or is) all funcs in funcList.
-   * Consider: multiple calls? Should be dealt with in the addRegion -- eventually each caller
-   * gets its own region
+   * Consider: multiple calls? Should be dealt with in the addRegion -- eventually each caller gets its own region
    */
   Function* goal = nullptr;
 #if DEBUG

From ca0e66775ec38425251766e6fbf5761b2f10016d Mon Sep 17 00:00:00 2001
From: Robert Zhang <jiaxuanzhang066@gmail.com>
Date: Sun, 17 Mar 2024 00:03:24 -0400
Subject: [PATCH 18/18] [InferAtomsPass] More Rust loop tests

---
 benchmarks/tests/example12.ll         | 290 ++++++++++++++++++++++++++
 benchmarks/tests/example13.ll         | 275 ++++++++++++++++++++++++
 benchmarks/tests/example13.orig.ll    | 280 +++++++++++++++++++++++++
 benchmarks/tests/example13.rs         |  25 +++
 ocelot/AtomicRegionInference/Makefile |   2 +
 5 files changed, 872 insertions(+)
 create mode 100644 benchmarks/tests/example12.ll
 create mode 100644 benchmarks/tests/example13.ll
 create mode 100644 benchmarks/tests/example13.orig.ll
 create mode 100644 benchmarks/tests/example13.rs

diff --git a/benchmarks/tests/example12.ll b/benchmarks/tests/example12.ll
new file mode 100644
index 0000000..8ad00f3
--- /dev/null
+++ b/benchmarks/tests/example12.ll
@@ -0,0 +1,290 @@
+; ModuleID = '../../benchmarks/tests/example12.bc'
+source_filename = "example12.2ec73fdcc3bed253-cgu.0"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E" }>, align 8
+@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8
+@atomic_depth = external global i16
+
+; Function Attrs: noinline uwtable
+define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %f) unnamed_addr #0 {
+start:
+  call void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %f)
+  call void asm sideeffect "", "~{memory}"(), !srcloc !3
+  ret void
+}
+
+; Function Attrs: uwtable
+define hidden i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 {
+start:
+  %_8 = alloca ptr, align 8
+  %_5 = alloca i64, align 8
+  store ptr %main, ptr %_8, align 8
+  %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe)
+  store i64 %0, ptr %_5, align 8
+  %v = load i64, ptr %_5, align 8, !noundef !4
+  ret i64 %v
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h9c77676ca687ad52E(ptr %_4)
+  %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"()
+  %_0 = zext i8 %self to i32
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %start1, i64 %n) unnamed_addr #2 {
+start:
+  %rhs = trunc i64 %n to i32
+  %_0 = add nsw i32 %start1, %rhs
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h2a2856448793d4cbE"(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0)
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @_ZN4core3ops8function6FnOnce9call_once17had97088f55991c2cE(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality {
+start:
+  %1 = alloca { ptr, i32 }, align 8
+  %_2 = alloca {}, align 1
+  %_1 = alloca ptr, align 8
+  store ptr %0, ptr %_1, align 8
+  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h339710bdc8eea187E"(ptr align 8 %_1)
+          to label %bb1 unwind label %cleanup
+
+bb3:                                              ; preds = %cleanup
+  %2 = load ptr, ptr %1, align 8, !noundef !4
+  %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  %4 = load i32, ptr %3, align 8, !noundef !4
+  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
+  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
+  resume { ptr, i32 } %6
+
+cleanup:                                          ; preds = %start
+  %7 = landingpad { ptr, i32 }
+          cleanup
+  %8 = extractvalue { ptr, i32 } %7, 0
+  %9 = extractvalue { ptr, i32 } %7, 1
+  %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
+  store ptr %8, ptr %10, align 8
+  %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  store i32 %9, ptr %11, align 8
+  br label %bb3
+
+bb1:                                              ; preds = %start
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @_ZN4core3ops8function6FnOnce9call_once17hc6a9dc29a00ac63eE(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  call void %_1()
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h47ea1b16ba3e87a4E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %self) unnamed_addr #2 {
+start:
+  %0 = call { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self)
+  %_0.0 = extractvalue { i32, i32 } %0, 0
+  %_0.1 = extractvalue { i32, i32 } %0, 1
+  %1 = insertvalue { i32, i32 } poison, i32 %_0.0, 0
+  %2 = insertvalue { i32, i32 } %1, i32 %_0.1, 1
+  ret { i32, i32 } %2
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hc6cb452c4729c1f5E"() unnamed_addr #2 {
+start:
+  ret i8 0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %self.0, i32 %self.1) unnamed_addr #2 {
+start:
+  %0 = insertvalue { i32, i32 } poison, i32 %self.0, 0
+  %1 = insertvalue { i32, i32 } %0, i32 %self.1, 1
+  ret { i32, i32 } %1
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h1b9638ceb504bcf5E"(ptr align 4 %self) unnamed_addr #2 {
+start:
+  %_0 = alloca { i32, i32 }, align 4
+  %_4 = getelementptr inbounds { i32, i32 }, ptr %self, i32 0, i32 1
+  %_3.i = load i32, ptr %self, align 4, !noundef !4
+  %_4.i = load i32, ptr %_4, align 4, !noundef !4
+  %_0.i = icmp slt i32 %_3.i, %_4.i
+  br i1 %_0.i, label %bb2, label %bb4
+
+bb4:                                              ; preds = %start
+  store i32 0, ptr %_0, align 4
+  br label %bb5
+
+bb2:                                              ; preds = %start
+  %old = load i32, ptr %self, align 4, !noundef !4
+  %_6 = call i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h3be66287c3fcbba4E"(i32 %old, i64 1)
+  store i32 %_6, ptr %self, align 4
+  %0 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1
+  store i32 %old, ptr %0, align 4
+  store i32 1, ptr %_0, align 4
+  br label %bb5
+
+bb5:                                              ; preds = %bb2, %bb4
+  %1 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 0
+  %2 = load i32, ptr %1, align 4, !range !5, !noundef !4
+  %3 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1
+  %4 = load i32, ptr %3, align 4
+  %5 = insertvalue { i32, i32 } poison, i32 %2, 0
+  %6 = insertvalue { i32, i32 } %5, i32 %4, 1
+  ret { i32, i32 } %6
+}
+
+; Function Attrs: uwtable
+define dso_local i32 @input() unnamed_addr #1 {
+start:
+  ret i32 0
+}
+
+; Function Attrs: uwtable
+define dso_local void @log(i32 %i) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @app() unnamed_addr #1 {
+start:
+  %0 = alloca { i32, i32 }, align 8
+  %_5 = alloca { i32, i32 }, align 4
+  %iter = alloca { i32, i32 }, align 4
+  %_3 = alloca { i32, i32 }, align 4
+  call void @atomic_start()
+  %x = call i32 @input()
+  store i32 0, ptr %_3, align 4
+  %1 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1
+  store i32 10, ptr %1, align 4
+  %2 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 0
+  %3 = load i32, ptr %2, align 4, !noundef !4
+  %4 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1
+  %5 = load i32, ptr %4, align 4, !noundef !4
+  %6 = call { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h9c831713eeb3e5efE"(i32 %3, i32 %5)
+  %7 = extractvalue { i32, i32 } %6, 0
+  %8 = extractvalue { i32, i32 } %6, 1
+  %9 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 0
+  store i32 %7, ptr %9, align 4
+  %10 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 1
+  store i32 %8, ptr %10, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb5, %start, <null operand!>, <null operand!>
+  %11 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %iter)
+  store { i32, i32 } %11, ptr %_5, align 4
+  %12 = load i32, ptr %_5, align 4, !range !5, !noundef !4
+  %_7 = zext i32 %12 to i64
+  %13 = icmp eq i64 %_7, 0
+  br i1 %13, label %bb7, label %bb5
+
+bb7:                                              ; preds = %bb3
+  call void @atomic_end()
+  store { i32, i32 } %11, ptr %0, align 4
+  br label %bb31
+
+bb5:                                              ; preds = %bb3
+  call void @log(i32 %x)
+  br label %bb3
+
+bb6:                                              ; No predecessors!
+  unreachable
+
+bb31:                                             ; preds = %bb52, %bb7
+  %14 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h3186fffc13203a36E"(ptr align 4 %iter)
+  store { i32, i32 } %11, ptr %_5, align 4
+  %15 = load i32, ptr %0, align 4, !range !5, !noundef !4
+  %16 = zext i32 %12 to i64
+  %17 = icmp eq i64 %_7, 0
+  br i1 %17, label %bb52, label %bb73
+
+bb52:                                             ; preds = %bb31
+  call void @log(i32 1)
+  br label %bb31
+
+bb73:                                             ; preds = %bb31
+  ret void
+}
+
+; Function Attrs: uwtable
+define internal void @_ZN9example124main17h35539225bd174e48E() unnamed_addr #1 {
+start:
+  call void @app()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_start() unnamed_addr #1 {
+start:
+  %local = load i16, ptr @atomic_depth, align 2, !noundef !4
+  call void @start_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_end() unnamed_addr #1 {
+start:
+  call void @end_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @start_atomic() unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @end_atomic() unnamed_addr #1
+
+define i32 @main(i32 %0, ptr %1) unnamed_addr #3 {
+top:
+  %2 = sext i32 %0 to i64
+  %3 = call i64 @_ZN3std2rt10lang_start17h04742dcfd5f87c29E(ptr @_ZN9example124main17h35539225bd174e48E, i64 %2, ptr %1, i8 0)
+  %4 = trunc i64 %3 to i32
+  ret i32 %4
+}
+
+attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 8, !"PIC Level", i32 2}
+!1 = !{i32 7, !"PIE Level", i32 2}
+!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
+!3 = !{i32 1453225}
+!4 = !{}
+!5 = !{i32 0, i32 2}
diff --git a/benchmarks/tests/example13.ll b/benchmarks/tests/example13.ll
new file mode 100644
index 0000000..1a22fb2
--- /dev/null
+++ b/benchmarks/tests/example13.ll
@@ -0,0 +1,275 @@
+; ModuleID = '../../benchmarks/tests/example13.bc'
+source_filename = "example13.a75a82856bfae51d-cgu.0"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hf0f1c0343a3d5304E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h4dfc989e8a89cebeE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E" }>, align 8
+@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8
+@atomic_depth = external global i16
+
+; Function Attrs: noinline uwtable
+define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf4f9b68dd936cd73E(ptr %f) unnamed_addr #0 {
+start:
+  call void @_ZN4core3ops8function6FnOnce9call_once17h6b12a0453d0fac53E(ptr %f)
+  call void asm sideeffect "", "~{memory}"(), !srcloc !3
+  ret void
+}
+
+; Function Attrs: uwtable
+define hidden i64 @_ZN3std2rt10lang_start17h431fe12d2c8c1de6E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 {
+start:
+  %_8 = alloca ptr, align 8
+  %_5 = alloca i64, align 8
+  store ptr %main, ptr %_8, align 8
+  %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe)
+  store i64 %0, ptr %_5, align 8
+  %v = load i64, ptr %_5, align 8, !noundef !4
+  ret i64 %v
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf4f9b68dd936cd73E(ptr %_4)
+  %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h2db0f42e6485e7e4E"()
+  %_0 = zext i8 %self to i32
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h4944c1e1e44c8861E"(i32 %start1, i64 %n) unnamed_addr #2 {
+start:
+  %rhs = trunc i64 %n to i32
+  %_0 = add nsw i32 %start1, %rhs
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h4dfc989e8a89cebeE"(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17hde4d0e94a62ddc18E(ptr %0)
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @_ZN4core3ops8function6FnOnce9call_once17h6b12a0453d0fac53E(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  call void %_1()
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @_ZN4core3ops8function6FnOnce9call_once17hde4d0e94a62ddc18E(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality {
+start:
+  %1 = alloca { ptr, i32 }, align 8
+  %_2 = alloca {}, align 1
+  %_1 = alloca ptr, align 8
+  store ptr %0, ptr %_1, align 8
+  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E"(ptr align 8 %_1)
+          to label %bb1 unwind label %cleanup
+
+bb3:                                              ; preds = %cleanup
+  %2 = load ptr, ptr %1, align 8, !noundef !4
+  %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  %4 = load i32, ptr %3, align 8, !noundef !4
+  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
+  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
+  resume { ptr, i32 } %6
+
+cleanup:                                          ; preds = %start
+  %7 = landingpad { ptr, i32 }
+          cleanup
+  %8 = extractvalue { ptr, i32 } %7, 0
+  %9 = extractvalue { ptr, i32 } %7, 1
+  %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
+  store ptr %8, ptr %10, align 8
+  %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  store i32 %9, ptr %11, align 8
+  br label %bb3
+
+bb1:                                              ; preds = %start
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hf0f1c0343a3d5304E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h81d1ae0fa0da4546E"(ptr align 4 %self) unnamed_addr #2 {
+start:
+  %0 = call { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h3a2fc0cbb86bcd54E"(ptr align 4 %self)
+  %_0.0 = extractvalue { i32, i32 } %0, 0
+  %_0.1 = extractvalue { i32, i32 } %0, 1
+  %1 = insertvalue { i32, i32 } poison, i32 %_0.0, 0
+  %2 = insertvalue { i32, i32 } %1, i32 %_0.1, 1
+  ret { i32, i32 } %2
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h2db0f42e6485e7e4E"() unnamed_addr #2 {
+start:
+  ret i8 0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h80dc70a24d0c93edE"(i32 %self.0, i32 %self.1) unnamed_addr #2 {
+start:
+  %0 = insertvalue { i32, i32 } poison, i32 %self.0, 0
+  %1 = insertvalue { i32, i32 } %0, i32 %self.1, 1
+  ret { i32, i32 } %1
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h3a2fc0cbb86bcd54E"(ptr align 4 %self) unnamed_addr #2 {
+start:
+  %_0 = alloca { i32, i32 }, align 4
+  %_4 = getelementptr inbounds { i32, i32 }, ptr %self, i32 0, i32 1
+  %_3.i = load i32, ptr %self, align 4, !noundef !4
+  %_4.i = load i32, ptr %_4, align 4, !noundef !4
+  %_0.i = icmp slt i32 %_3.i, %_4.i
+  br i1 %_0.i, label %bb2, label %bb4
+
+bb4:                                              ; preds = %start
+  store i32 0, ptr %_0, align 4
+  br label %bb5
+
+bb2:                                              ; preds = %start
+  %old = load i32, ptr %self, align 4, !noundef !4
+  %_6 = call i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h4944c1e1e44c8861E"(i32 %old, i64 1)
+  store i32 %_6, ptr %self, align 4
+  %0 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1
+  store i32 %old, ptr %0, align 4
+  store i32 1, ptr %_0, align 4
+  br label %bb5
+
+bb5:                                              ; preds = %bb2, %bb4
+  %1 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 0
+  %2 = load i32, ptr %1, align 4, !range !5, !noundef !4
+  %3 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1
+  %4 = load i32, ptr %3, align 4
+  %5 = insertvalue { i32, i32 } poison, i32 %2, 0
+  %6 = insertvalue { i32, i32 } %5, i32 %4, 1
+  ret { i32, i32 } %6
+}
+
+; Function Attrs: uwtable
+define dso_local i32 @input() unnamed_addr #1 {
+start:
+  ret i32 0
+}
+
+; Function Attrs: uwtable
+define dso_local void @log(i32 %i) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @app() unnamed_addr #1 {
+start:
+  %_6 = alloca { i32, i32 }, align 4
+  %iter = alloca { i32, i32 }, align 4
+  %_3 = alloca { i32, i32 }, align 4
+  call void @atomic_start()
+  %x = call i32 @input()
+  store i32 %x, ptr %_3, align 4
+  %0 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1
+  store i32 10, ptr %0, align 4
+  %1 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 0
+  %2 = load i32, ptr %1, align 4, !noundef !4
+  %3 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1
+  %4 = load i32, ptr %3, align 4, !noundef !4
+  %5 = call { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h80dc70a24d0c93edE"(i32 %2, i32 %4)
+  %6 = extractvalue { i32, i32 } %5, 0
+  %7 = extractvalue { i32, i32 } %5, 1
+  %8 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 0
+  store i32 %6, ptr %8, align 4
+  %9 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 1
+  store i32 %7, ptr %9, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %start, %bb5, <null operand!>
+  %10 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h81d1ae0fa0da4546E"(ptr align 4 %iter)
+  store { i32, i32 } %10, ptr %_6, align 4
+  %11 = load i32, ptr %_6, align 4, !range !5, !noundef !4
+  %_8 = zext i32 %11 to i64
+  %12 = icmp eq i64 %_8, 0
+  br i1 %12, label %bb7, label %bb5
+
+bb7:                                              ; preds = %bb3
+  call void @atomic_end()
+  ret void
+
+bb5:                                              ; preds = %bb3
+  %13 = getelementptr inbounds { i32, i32 }, ptr %_6, i32 0, i32 1
+  %i = load i32, ptr %13, align 4, !noundef !4
+  call void @log(i32 %i)
+  br label %bb3
+
+bb6:                                              ; No predecessors!
+  unreachable
+}
+
+; Function Attrs: uwtable
+define internal void @_ZN9example134main17haba30008cc3025a3E() unnamed_addr #1 {
+start:
+  call void @app()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_start() unnamed_addr #1 {
+start:
+  %local = load i16, ptr @atomic_depth, align 2, !noundef !4
+  call void @start_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_end() unnamed_addr #1 {
+start:
+  call void @end_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @start_atomic() unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @end_atomic() unnamed_addr #1
+
+define i32 @main(i32 %0, ptr %1) unnamed_addr #3 {
+top:
+  %2 = sext i32 %0 to i64
+  %3 = call i64 @_ZN3std2rt10lang_start17h431fe12d2c8c1de6E(ptr @_ZN9example134main17haba30008cc3025a3E, i64 %2, ptr %1, i8 0)
+  %4 = trunc i64 %3 to i32
+  ret i32 %4
+}
+
+attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 8, !"PIC Level", i32 2}
+!1 = !{i32 7, !"PIE Level", i32 2}
+!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
+!3 = !{i32 1453209}
+!4 = !{}
+!5 = !{i32 0, i32 2}
diff --git a/benchmarks/tests/example13.orig.ll b/benchmarks/tests/example13.orig.ll
new file mode 100644
index 0000000..564dab2
--- /dev/null
+++ b/benchmarks/tests/example13.orig.ll
@@ -0,0 +1,280 @@
+; ModuleID = '../../benchmarks/tests/example13.bc'
+source_filename = "example13.a75a82856bfae51d-cgu.0"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx12.0.0"
+
+@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hf0f1c0343a3d5304E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h4dfc989e8a89cebeE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E" }>, align 8
+@IO_NAME = constant <{ ptr }> <{ ptr @input }>, align 8
+@atomic_depth = external global i16
+
+; Function Attrs: noinline uwtable
+define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf4f9b68dd936cd73E(ptr %f) unnamed_addr #0 {
+start:
+  call void @_ZN4core3ops8function6FnOnce9call_once17h6b12a0453d0fac53E(ptr %f)
+  call void asm sideeffect "", "~{memory}"(), !srcloc !3
+  ret void
+}
+
+; Function Attrs: uwtable
+define hidden i64 @_ZN3std2rt10lang_start17h431fe12d2c8c1de6E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 {
+start:
+  %_8 = alloca ptr, align 8
+  %_5 = alloca i64, align 8
+  store ptr %main, ptr %_8, align 8
+  %0 = call i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, i8 %sigpipe)
+  store i64 %0, ptr %_5, align 8
+  %v = load i64, ptr %_5, align 8, !noundef !4
+  ret i64 %v
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf4f9b68dd936cd73E(ptr %_4)
+  %self = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h2db0f42e6485e7e4E"()
+  %_0 = zext i8 %self to i32
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h4944c1e1e44c8861E"(i32 %start1, i64 %n) unnamed_addr #2 {
+start:
+  %rhs = trunc i64 %n to i32
+  %_0 = add nsw i32 %start1, %rhs
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h4dfc989e8a89cebeE"(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
+  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17hde4d0e94a62ddc18E(ptr %0)
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @_ZN4core3ops8function6FnOnce9call_once17h6b12a0453d0fac53E(ptr %_1) unnamed_addr #2 {
+start:
+  %_2 = alloca {}, align 1
+  call void %_1()
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i32 @_ZN4core3ops8function6FnOnce9call_once17hde4d0e94a62ddc18E(ptr %0) unnamed_addr #2 personality ptr @rust_eh_personality {
+start:
+  %1 = alloca { ptr, i32 }, align 8
+  %_2 = alloca {}, align 1
+  %_1 = alloca ptr, align 8
+  store ptr %0, ptr %_1, align 8
+  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h51a796a89a29b131E"(ptr align 8 %_1)
+          to label %bb1 unwind label %cleanup
+
+bb3:                                              ; preds = %cleanup
+  %2 = load ptr, ptr %1, align 8, !noundef !4
+  %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  %4 = load i32, ptr %3, align 8, !noundef !4
+  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
+  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
+  resume { ptr, i32 } %6
+
+cleanup:                                          ; preds = %start
+  %7 = landingpad { ptr, i32 }
+          cleanup
+  %8 = extractvalue { ptr, i32 } %7, 0
+  %9 = extractvalue { ptr, i32 } %7, 1
+  %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
+  store ptr %8, ptr %10, align 8
+  %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
+  store i32 %9, ptr %11, align 8
+  br label %bb3
+
+bb1:                                              ; preds = %start
+  ret i32 %_0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hf0f1c0343a3d5304E"(ptr align 8 %_1) unnamed_addr #2 {
+start:
+  ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h81d1ae0fa0da4546E"(ptr align 4 %self) unnamed_addr #2 {
+start:
+  %0 = call { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h3a2fc0cbb86bcd54E"(ptr align 4 %self)
+  %_0.0 = extractvalue { i32, i32 } %0, 0
+  %_0.1 = extractvalue { i32, i32 } %0, 1
+  %1 = insertvalue { i32, i32 } poison, i32 %_0.0, 0
+  %2 = insertvalue { i32, i32 } %1, i32 %_0.1, 1
+  ret { i32, i32 } %2
+}
+
+; Function Attrs: inlinehint uwtable
+define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h2db0f42e6485e7e4E"() unnamed_addr #2 {
+start:
+  ret i8 0
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h80dc70a24d0c93edE"(i32 %self.0, i32 %self.1) unnamed_addr #2 {
+start:
+  %0 = insertvalue { i32, i32 } poison, i32 %self.0, 0
+  %1 = insertvalue { i32, i32 } %0, i32 %self.1, 1
+  ret { i32, i32 } %1
+}
+
+; Function Attrs: inlinehint uwtable
+define internal { i32, i32 } @"_ZN89_$LT$core..ops..range..Range$LT$T$GT$$u20$as$u20$core..iter..range..RangeIteratorImpl$GT$9spec_next17h3a2fc0cbb86bcd54E"(ptr align 4 %self) unnamed_addr #2 {
+start:
+  %_0 = alloca { i32, i32 }, align 4
+  %_4 = getelementptr inbounds { i32, i32 }, ptr %self, i32 0, i32 1
+  %_3.i = load i32, ptr %self, align 4, !noundef !4
+  %_4.i = load i32, ptr %_4, align 4, !noundef !4
+  %_0.i = icmp slt i32 %_3.i, %_4.i
+  br i1 %_0.i, label %bb2, label %bb4
+
+bb4:                                              ; preds = %start
+  store i32 0, ptr %_0, align 4
+  br label %bb5
+
+bb2:                                              ; preds = %start
+  %old = load i32, ptr %self, align 4, !noundef !4
+  %_6 = call i32 @"_ZN47_$LT$i32$u20$as$u20$core..iter..range..Step$GT$17forward_unchecked17h4944c1e1e44c8861E"(i32 %old, i64 1)
+  store i32 %_6, ptr %self, align 4
+  %0 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1
+  store i32 %old, ptr %0, align 4
+  store i32 1, ptr %_0, align 4
+  br label %bb5
+
+bb5:                                              ; preds = %bb2, %bb4
+  %1 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 0
+  %2 = load i32, ptr %1, align 4, !range !5, !noundef !4
+  %3 = getelementptr inbounds { i32, i32 }, ptr %_0, i32 0, i32 1
+  %4 = load i32, ptr %3, align 4
+  %5 = insertvalue { i32, i32 } poison, i32 %2, 0
+  %6 = insertvalue { i32, i32 } %5, i32 %4, 1
+  ret { i32, i32 } %6
+}
+
+; Function Attrs: uwtable
+define dso_local i32 @input() unnamed_addr #1 {
+start:
+  ret i32 0
+}
+
+; Function Attrs: uwtable
+define dso_local void @log(i32 %i) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @app() unnamed_addr #1 {
+start:
+  %_6 = alloca { i32, i32 }, align 4
+  %iter = alloca { i32, i32 }, align 4
+  %_3 = alloca { i32, i32 }, align 4
+  %x = call i32 @input()
+  store i32 %x, ptr %_3, align 4
+  %0 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1
+  store i32 10, ptr %0, align 4
+  %1 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 0
+  %2 = load i32, ptr %1, align 4, !noundef !4
+  %3 = getelementptr inbounds { i32, i32 }, ptr %_3, i32 0, i32 1
+  %4 = load i32, ptr %3, align 4, !noundef !4
+  %5 = call { i32, i32 } @"_ZN63_$LT$I$u20$as$u20$core..iter..traits..collect..IntoIterator$GT$9into_iter17h80dc70a24d0c93edE"(i32 %2, i32 %4)
+  %_2.0 = extractvalue { i32, i32 } %5, 0
+  %_2.1 = extractvalue { i32, i32 } %5, 1
+  %6 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 0
+  store i32 %_2.0, ptr %6, align 4
+  %7 = getelementptr inbounds { i32, i32 }, ptr %iter, i32 0, i32 1
+  store i32 %_2.1, ptr %7, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb5, %start
+  %8 = call { i32, i32 } @"_ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h81d1ae0fa0da4546E"(ptr align 4 %iter)
+  store { i32, i32 } %8, ptr %_6, align 4
+  %9 = load i32, ptr %_6, align 4, !range !5, !noundef !4
+  %_8 = zext i32 %9 to i64
+  %10 = icmp eq i64 %_8, 0
+  br i1 %10, label %bb7, label %bb5
+
+bb7:                                              ; preds = %bb3
+  call void @Fresh(i32 %x)
+  ret void
+
+bb5:                                              ; preds = %bb3
+  %11 = getelementptr inbounds { i32, i32 }, ptr %_6, i32 0, i32 1
+  %i = load i32, ptr %11, align 4, !noundef !4
+  call void @log(i32 %i)
+  br label %bb3
+
+bb6:                                              ; No predecessors!
+  unreachable
+}
+
+; Function Attrs: uwtable
+define internal void @_ZN9example134main17haba30008cc3025a3E() unnamed_addr #1 {
+start:
+  call void @app()
+  ret void
+}
+
+; Function Attrs: uwtable
+define internal void @Fresh(i32 %_var) unnamed_addr #1 {
+start:
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_start() unnamed_addr #1 {
+start:
+  %local = load i16, ptr @atomic_depth, align 2, !noundef !4
+  call void @start_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+define dso_local void @atomic_end() unnamed_addr #1 {
+start:
+  call void @end_atomic()
+  ret void
+}
+
+; Function Attrs: uwtable
+declare i64 @_ZN3std2rt19lang_start_internal17hadaf077a6dd0140bE(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @start_atomic() unnamed_addr #1
+
+; Function Attrs: uwtable
+declare void @end_atomic() unnamed_addr #1
+
+define i32 @main(i32 %0, ptr %1) unnamed_addr #3 {
+top:
+  %2 = sext i32 %0 to i64
+  %3 = call i64 @_ZN3std2rt10lang_start17h431fe12d2c8c1de6E(ptr @_ZN9example134main17haba30008cc3025a3E, i64 %2, ptr %1, i8 0)
+  %4 = trunc i64 %3 to i32
+  ret i32 %4
+}
+
+attributes #0 = { noinline uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #1 = { uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #2 = { inlinehint uwtable "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+attributes #3 = { "frame-pointer"="non-leaf" "target-cpu"="apple-m1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 8, !"PIC Level", i32 2}
+!1 = !{i32 7, !"PIE Level", i32 2}
+!2 = !{!"rustc version 1.73.0 (cc66ad468 2023-10-03)"}
+!3 = !{i32 1453209}
+!4 = !{}
+!5 = !{i32 0, i32 2}
diff --git a/benchmarks/tests/example13.rs b/benchmarks/tests/example13.rs
new file mode 100644
index 0000000..3748f61
--- /dev/null
+++ b/benchmarks/tests/example13.rs
@@ -0,0 +1,25 @@
+include!("../intermittent.rs");
+
+#[no_mangle]
+fn input() -> i32 {
+    0
+}
+
+#[no_mangle]
+pub static IO_NAME: fn() -> i32 = input;
+
+#[no_mangle]
+fn log(i: i32) -> () {}
+
+#[no_mangle]
+fn app() -> () {
+    let x = input();
+    for i in x..10 {
+        log(i);
+    }
+    Fresh(x);
+}
+
+fn main() -> () {
+    app()
+}
diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile
index bacbb17..4dbe6ea 100644
--- a/ocelot/AtomicRegionInference/Makefile
+++ b/ocelot/AtomicRegionInference/Makefile
@@ -40,6 +40,8 @@ eg11:
 	TEST=example11 make testr
 eg12:
 	TEST=example12 make testr
+eg13:
+	TEST=example13 make testr
 
 run_eg1:
 	TEST=example01 make run && ../../benchmarks/tests/example01.out