diff --git a/include/DataDependencyGraph.hh b/include/DataDependencyGraph.hh index a93cf3c..d191987 100644 --- a/include/DataDependencyGraph.hh +++ b/include/DataDependencyGraph.hh @@ -21,6 +21,7 @@ namespace pdg void addAliasEdges(llvm::Instruction &inst); llvm::AliasResult queryAliasUnderApproximate(llvm::Value &v1, llvm::Value &v2); + void dumpDataDepGraph(llvm::Function &F); private: llvm::MemoryDependenceResults *_mem_dep_res; }; diff --git a/include/Graph.hh b/include/Graph.hh index 335ba6e..b840911 100644 --- a/include/Graph.hh +++ b/include/Graph.hh @@ -46,6 +46,8 @@ namespace pdg ValueNodeMap &getValueNodeMap() { return _val_node_map; } void dumpGraph(); + std::set findNodesReachedByEdges(Node &src, const std::set &edge_types, bool is_backward = false); + protected: ValueNodeMap _val_node_map; EdgeSet _edge_set; @@ -85,8 +87,10 @@ namespace pdg void addFormalTreeNodesToGraph(FunctionWrapper &func_w); bool isAnnotationCallInst(llvm::Instruction &inst); void buildGlobalAnnotationNodes(llvm::Module &M); + void dumpDataDepGraph(llvm::Function &F); + private: FuncWrapperMap _func_wrapper_map; CallWrapperMap _call_wrapper_map; diff --git a/include/LLVMEssentials.hh b/include/LLVMEssentials.hh index 02ab0a9..09bbcea 100644 --- a/include/LLVMEssentials.hh +++ b/include/LLVMEssentials.hh @@ -12,7 +12,6 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/GraphWriter.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/CommandLine.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/Support/CommandLine.h" #endif \ No newline at end of file diff --git a/include/ProgramDependencyGraph.hh b/include/ProgramDependencyGraph.hh index 48600e9..97895e6 100644 --- a/include/ProgramDependencyGraph.hh +++ b/include/ProgramDependencyGraph.hh @@ -32,6 +32,9 @@ namespace pdg bool canReach(Node &src, Node &dst); bool canReach(Node &src, Node &dst, std::set exclude_edge_types); + bool isIndirectCallCandidates(CallWrapper &cw, FunctionWrapper &fw); + bool checkChildNodes(Tree* src_tree, Tree* dst_tree); + private: llvm::Module *_module; ProgramGraph *_PDG; diff --git a/include/Tree.hh b/include/Tree.hh index 1d588f4..fa7b243 100644 --- a/include/Tree.hh +++ b/include/Tree.hh @@ -26,9 +26,9 @@ namespace pdg std::vector &getChildNodes() { return _children; } std::unordered_set &getAddrVars() { return _addr_vars; } void computeDerivedAddrVarsFromParent(); - TreeNode *getParentNode() { return _parent_node; } + TreeNode *getParentNode() const { return _parent_node; } Tree *getTree() { return _tree; } - int getDepth() { return _depth; } + int getDepth() const { return _depth; } void addAccessTag(AccessTag acc_tag) { _acc_tag_set.insert(acc_tag); } std::set &getAccessTags() { return _acc_tag_set; } bool isRootNode() {return _parent_node == nullptr;} diff --git a/src/CallWrapper.cpp b/src/CallWrapper.cpp index 9812114..d5f3b3a 100644 --- a/src/CallWrapper.cpp +++ b/src/CallWrapper.cpp @@ -18,13 +18,14 @@ void pdg::CallWrapper::buildActualTreeForArgs(FunctionWrapper &callee_fw) while (actual_arg_iter != _arg_list.end()) { Tree* arg_formal_in_tree = callee_fw.getArgFormalInTree(**formal_arg_iter); - if (!arg_formal_in_tree) - { + if (!arg_formal_in_tree) { + // in some case, not each parameter has tree, for example, a function with structure parameter actual_arg_iter++; formal_arg_iter++; - // in some case, not each parameter has tree, for example, a function with structure parameter continue; } + + // build actual in tree, copying the formal_in tree structure at the moment Tree* arg_actual_in_tree = new Tree(*arg_formal_in_tree); arg_actual_in_tree->setBaseVal(**actual_arg_iter); @@ -82,4 +83,4 @@ pdg::Tree *pdg::CallWrapper::getArgActualOutTree(Value &actual_arg) if (iter == _arg_actual_out_tree_map.end()) return nullptr; return _arg_actual_out_tree_map[&actual_arg]; -} +} \ No newline at end of file diff --git a/src/DataDependencyGraph.cpp b/src/DataDependencyGraph.cpp index 9952674..63e1e98 100644 --- a/src/DataDependencyGraph.cpp +++ b/src/DataDependencyGraph.cpp @@ -1,5 +1,4 @@ #include "DataDependencyGraph.hh" -#include "PDGUtils.hh" char pdg::DataDependencyGraph::ID = 0; @@ -19,7 +18,6 @@ bool pdg::DataDependencyGraph::runOnModule(Module &M) { if (F.isDeclaration() || F.empty()) continue; - _mem_dep_res = &getAnalysis(F).getMemDep(); // setup alias query interface for each function for (auto inst_iter = inst_begin(F); inst_iter != inst_end(F); inst_iter++) @@ -27,7 +25,6 @@ bool pdg::DataDependencyGraph::runOnModule(Module &M) addDefUseEdges(*inst_iter); addAliasEdges(*inst_iter); addRAWEdges(*inst_iter); - // some RAW could be missing due to the unsound alias analysis, need to swap the alias analysis used by the memory dependency analysis to obtain more precise results. addRAWEdgesUnderapproximate(*inst_iter); } } @@ -82,8 +79,10 @@ void pdg::DataDependencyGraph::addRAWEdges(Instruction &inst) ProgramGraph &g = ProgramGraph::getInstance(); auto dep_res = _mem_dep_res->getDependency(&inst); auto dep_inst = dep_res.getInst(); - - if (!dep_inst || !isa(dep_inst)) + + if (!dep_inst) + return; + if (!isa(dep_inst)) return; Node *src = g.getNode(inst); @@ -167,4 +166,4 @@ AliasResult pdg::DataDependencyGraph::queryAliasUnderApproximate(Value &v1, Valu } static RegisterPass - DDG("ddg", "Data Dependency Graph Construction", false, true); + DDG("ddg", "Data Dependency Graph Construction", false, true); \ No newline at end of file diff --git a/src/Graph.cpp b/src/Graph.cpp index cf73376..1e16d20 100644 --- a/src/Graph.cpp +++ b/src/Graph.cpp @@ -108,6 +108,7 @@ void pdg::ProgramGraph::build(Module &M) Node * n = new Node(global_var, node_type); _val_node_map.insert(std::pair(&global_var, n)); + addNode(*n); } @@ -129,10 +130,6 @@ void pdg::ProgramGraph::build(Module &M) node_type = GraphNodeType::INST_FUNCALL; if (isa(&*inst_iter)) node_type = GraphNodeType::INST_BR; - - Node *n = new Node(*inst_iter, node_type); - - // handle values used inside instructions, e.g., tmp gep inst... if (isa(&*inst_iter)) { for (auto operand : inst_iter->operand_values()) { if (!isa(operand)) { @@ -141,7 +138,7 @@ void pdg::ProgramGraph::build(Module &M) } } } - + Node *n = new Node(*inst_iter, node_type); _val_node_map.insert(std::pair(&*inst_iter, n)); func_w->addInst(*inst_iter); addNode(*n); @@ -153,10 +150,10 @@ void pdg::ProgramGraph::build(Module &M) _func_wrapper_map.insert(std::make_pair(&F, func_w)); } - // build call graph - auto &call_g = PDGCallGraph::getInstance(); + auto &call_g = pdg::PDGCallGraph::getInstance(); if (!call_g.isBuild()) call_g.build(M); + // handle call sites for (auto &F : M) { @@ -170,12 +167,12 @@ void pdg::ProgramGraph::build(Module &M) for (auto ci : call_insts) { auto called_func = pdgutils::getCalledFunc(*ci); - if (called_func == nullptr) - { - // handle indirect call + if (called_func == nullptr) { auto ind_call_candidates = call_g.getIndirectCallCandidates(*ci, M); if (ind_call_candidates.size() > 0) called_func = *ind_call_candidates.begin(); + else + continue; } if (!hasFuncWrapper(*called_func)) continue; @@ -387,11 +384,42 @@ void pdg::ProgramGraph::buildGlobalAnnotationNodes(Module &M) } } + +std::set pdg::GenericGraph::findNodesReachedByEdges(pdg::Node &src, const std::set &edge_types, bool is_backward) +{ + std::set ret; + std::queue node_queue; + node_queue.push(&src); + std::set visited; + while (!node_queue.empty()) + { + Node *current_node = node_queue.front(); + node_queue.pop(); + if (visited.find(current_node) != visited.end()) + continue; + visited.insert(current_node); + ret.insert(current_node); + Node::EdgeSet edge_set; + if (is_backward) + edge_set = current_node->getInEdgeSet(); + else + edge_set = current_node->getOutEdgeSet(); + for (auto edge : edge_set) + { + if (edge_types.find(edge->getEdgeType()) == edge_types.end()) + continue; + node_queue.push(edge->getDstNode()); + } + } + return ret; +} + + void pdg::ProgramGraph::dumpDataDepGraph(Function &F) { for (auto iter1 = inst_begin(F); iter1 != inst_end(F); iter1++) { for (auto iter2 = inst_begin(F); iter2 != inst_end(F); iter2++) { if (&*iter1 == &*iter2) - continue; + continue; Node* n1 = getNode(*iter1); Node* n2 = getNode(*iter2); assert((n1 && n2) && "cannot process null node"); @@ -399,4 +427,4 @@ void pdg::ProgramGraph::dumpDataDepGraph(Function &F) { errs() << *iter1 << " - " << *iter2 << "\n"; } } -} \ No newline at end of file +} diff --git a/src/PDGCallGraph.cpp b/src/PDGCallGraph.cpp index 8eceac4..658e1d4 100644 --- a/src/PDGCallGraph.cpp +++ b/src/PDGCallGraph.cpp @@ -93,6 +93,8 @@ bool pdg::PDGCallGraph::isTypeEqual(Type& t1, Type &t2) return (t1_name == t2_name); } + + std::set pdg::PDGCallGraph::getIndirectCallCandidates(CallInst &ci, Module &M) { Type *call_func_ty = ci.getFunctionType(); @@ -102,8 +104,9 @@ std::set pdg::PDGCallGraph::getIndirectCallCandidates(CallInst &ci, { if (F.isDeclaration() || F.empty()) continue; - if (isFuncSignatureMatch(ci, F)) + if (isFuncSignatureMatch(ci, F)) { ind_call_cand.insert(&F); + } } return ind_call_cand; } diff --git a/src/PDGUtils.cpp b/src/PDGUtils.cpp index 0b3e555..85b57ec 100644 --- a/src/PDGUtils.cpp +++ b/src/PDGUtils.cpp @@ -24,7 +24,7 @@ uint64_t pdg::pdgutils::getGEPOffsetInBits(Module& M, StructType &struct_type, G auto const struct_layout = data_layout.getStructLayout(&struct_type); if (gep_offset >= struct_type.getNumElements()) { - errs() << "dubious gep access outof bound: " << gep << " in func " << gep.getFunction()->getName() << "\n"; + //errs() << "dubious gep access outof bound: " << gep << " in func " << gep.getFunction()->getName() << "\n"; return INT_MIN; } uint64_t field_bit_offset = struct_layout->getElementOffsetInBits(gep_offset); @@ -391,7 +391,6 @@ std::string& pdg::pdgutils::rtrim(std::string& s, const char* t) return s; } - // check if i1 is precede of i2 bool pdg::pdgutils::isPrecedeInst(Instruction &i1, Instruction &i2, Function& F) { @@ -403,4 +402,4 @@ bool pdg::pdgutils::isPrecedeInst(Instruction &i1, Instruction &i2, Function& F) return false; } return false; -} +} \ No newline at end of file diff --git a/src/ProgramDependencyGraph.cpp b/src/ProgramDependencyGraph.cpp index 025b09c..6ff0727 100644 --- a/src/ProgramDependencyGraph.cpp +++ b/src/ProgramDependencyGraph.cpp @@ -1,20 +1,24 @@ #include "ProgramDependencyGraph.hh" -#include "llvm/IR/Instruction.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include +#include +#include using namespace llvm; char pdg::ProgramDependencyGraph::ID = 0; -void pdg::ProgramDependencyGraph::getAnalysisUsage(AnalysisUsage &AU) const { +bool pdg::DEBUG; + +cl::opt DEBUG("pdg-debug", cl::desc("print debug messages"), cl::value_desc("print debug messages"), cl::location(pdg::DEBUG), cl::init(false)); + +void pdg::ProgramDependencyGraph::getAnalysisUsage(AnalysisUsage &AU) const +{ AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); } -bool pdg::ProgramDependencyGraph::runOnModule(Module &M) { +bool pdg::ProgramDependencyGraph::runOnModule(Module &M) +{ auto start = std::chrono::high_resolution_clock::now(); _module = &M; _PDG = &ProgramGraph::getInstance(); @@ -22,15 +26,17 @@ bool pdg::ProgramDependencyGraph::runOnModule(Module &M) { PDGCallGraph &call_g = PDGCallGraph::getInstance(); if (!call_g.isBuild()) call_g.build(M); - - if (!_PDG->isBuild()) { + + if (!_PDG->isBuild()) + { _PDG->build(M); _PDG->bindDITypeToNodes(M); } unsigned func_size = 0; connectGlobalVarWithUses(); - for (auto &F : M) { + for (auto &F : M) + { if (F.isDeclaration()) continue; connectIntraprocDependencies(F); @@ -38,24 +44,25 @@ bool pdg::ProgramDependencyGraph::runOnModule(Module &M) { func_size++; } errs() << "func size: " << func_size << "\n"; - errs() << "Finsh adding dependencies" - << "\n"; + errs() << "Finsh adding dependencies" << "\n"; auto stop = std::chrono::high_resolution_clock::now(); - auto duration = - std::chrono::duration_cast(stop - start); - errs() << "building PDG takes: " << duration.count() << "\n"; + auto duration = std::chrono::duration_cast(stop - start); + errs() << "building PDG takes: " << duration.count() << "\n"; errs() << "PDG Node size: " << _PDG->numNode() << "\n"; return false; } -void pdg::ProgramDependencyGraph::connectGlobalVarWithUses() { - for (auto &global_var : _module->getGlobalList()) { - Node *n = _PDG->getNode(global_var); +void pdg::ProgramDependencyGraph::connectGlobalVarWithUses() +{ + for (auto &global_var : _module->getGlobalList()) + { + Node* n = _PDG->getNode(global_var); if (n == nullptr) continue; - for (auto user : global_var.users()) { - Node *user_node = _PDG->getNode(*user); + for (auto user : global_var.users()) + { + Node* user_node = _PDG->getNode(*user); if (user_node == nullptr) continue; n->addNeighbor(*user_node, EdgeType::DATA_DEF_USE); @@ -63,75 +70,75 @@ void pdg::ProgramDependencyGraph::connectGlobalVarWithUses() { } } -void pdg::ProgramDependencyGraph::connectInTrees(Tree *src_tree, Tree *dst_tree, - EdgeType edge_type) { +void pdg::ProgramDependencyGraph::connectInTrees(Tree* src_tree, Tree* dst_tree, EdgeType edge_type) +{ if (src_tree->size() != dst_tree->size()) return; auto src_tree_root_node = src_tree->getRootNode(); auto dst_tree_root_node = dst_tree->getRootNode(); - std::queue> node_pairs_queue; + std::queue> node_pairs_queue; node_pairs_queue.push(std::make_pair(src_tree_root_node, dst_tree_root_node)); - while (!node_pairs_queue.empty()) { + while (!node_pairs_queue.empty()) + { auto current_node_pair = node_pairs_queue.front(); node_pairs_queue.pop(); - TreeNode *src = current_node_pair.first; - TreeNode *dst = current_node_pair.second; + TreeNode* src = current_node_pair.first; + TreeNode* dst = current_node_pair.second; assert(src->numOfChild() == dst->numOfChild()); src->addNeighbor(*dst, edge_type); auto src_node_children = src->getChildNodes(); auto dst_node_children = dst->getChildNodes(); - for (int i = 0; i < src->numOfChild(); i++) { - node_pairs_queue.push( - std::make_pair(src_node_children[i], dst_node_children[i])); + for (int i = 0; i < src->numOfChild(); i++) + { + node_pairs_queue.push(std::make_pair(src_node_children[i], dst_node_children[i])); } } } -void pdg::ProgramDependencyGraph::connectOutTrees(Tree *src_tree, - Tree *dst_tree, - EdgeType edge_type) { +void pdg::ProgramDependencyGraph::connectOutTrees(Tree* src_tree, Tree* dst_tree, EdgeType edge_type) +{ if (src_tree->size() != dst_tree->size()) return; auto src_tree_root_node = src_tree->getRootNode(); auto dst_tree_root_node = dst_tree->getRootNode(); - std::queue> node_pairs_queue; + std::queue> node_pairs_queue; node_pairs_queue.push(std::make_pair(src_tree_root_node, dst_tree_root_node)); - while (!node_pairs_queue.empty()) { + while (!node_pairs_queue.empty()) + { auto current_node_pair = node_pairs_queue.front(); node_pairs_queue.pop(); - TreeNode *src = current_node_pair.first; - TreeNode *dst = current_node_pair.second; + TreeNode* src = current_node_pair.first; + TreeNode* dst = current_node_pair.second; assert(src->numOfChild() == dst->numOfChild()); if (src->hasWriteAccess()) src->addNeighbor(*dst, edge_type); auto src_node_children = src->getChildNodes(); auto dst_node_children = dst->getChildNodes(); - for (int i = 0; i < src->numOfChild(); i++) { - node_pairs_queue.push( - std::make_pair(src_node_children[i], dst_node_children[i])); + for (int i = 0; i < src->numOfChild(); i++) + { + node_pairs_queue.push(std::make_pair(src_node_children[i], dst_node_children[i])); } } } -void pdg::ProgramDependencyGraph::connectCallerAndCallee(CallWrapper &cw, - FunctionWrapper &fw) { +void pdg::ProgramDependencyGraph::connectCallerAndCallee(CallWrapper &cw, FunctionWrapper &fw) +{ // step 1: connect call site node with the entry node of function auto call_site_node = _PDG->getNode(*cw.getCallInst()); auto func_entry_node = fw.getEntryNode(); - if (call_site_node == nullptr || func_entry_node == nullptr) + if (call_site_node == nullptr || func_entry_node == nullptr ) return; call_site_node->addNeighbor(*func_entry_node, EdgeType::CONTROLDEP_CALLINV); // step 2: connect actual in -> formal in, formal out -> actual out auto actual_arg_list = cw.getArgList(); auto formal_arg_list = fw.getArgList(); - assert(actual_arg_list.size() == formal_arg_list.size() && - "cannot connect tree edges due to inequal arg num! " - "(connectCallerandCallee)"); + assert(actual_arg_list.size() == formal_arg_list.size() && "cannot connect tree edges due to inequal arg num! (connectCallerandCallee)"); int num_arg = cw.getArgList().size(); - for (int i = 0; i < num_arg; i++) { + for (int i = 0; i < num_arg; i++) + { Value *actual_arg = actual_arg_list[i]; - Argument *formal_arg = formal_arg_list[i]; + Argument *formal_arg = formal_arg_list[i]; // step 2: connect actual in -> formal in auto actual_in_tree = cw.getArgActualInTree(*actual_arg); auto formal_in_tree = fw.getArgFormalInTree(*formal_arg); @@ -144,35 +151,34 @@ void pdg::ProgramDependencyGraph::connectCallerAndCallee(CallWrapper &cw, connectOutTrees(formal_out_tree, actual_out_tree, EdgeType::PARAMETER_OUT); } - // step3: connect return value actual in -> formal in, formal out -> actual - // out - if (!fw.hasNullRetVal() && !cw.hasNullRetVal()) { + // step3: connect return value actual in -> formal in, formal out -> actual out + if (!fw.hasNullRetVal() && !cw.hasNullRetVal()) + { Tree *ret_formal_in_tree = fw.getRetFormalInTree(); Tree *ret_formal_out_tree = fw.getRetFormalOutTree(); Tree *ret_actual_in_tree = cw.getRetActualInTree(); Tree *ret_actual_out_tree = cw.getRetActualOutTree(); - connectInTrees(ret_actual_in_tree, ret_formal_in_tree, - EdgeType::PARAMETER_IN); - connectInTrees(ret_actual_out_tree, ret_formal_out_tree, - EdgeType::PARAMETER_OUT); + connectInTrees(ret_actual_in_tree, ret_formal_in_tree, EdgeType::PARAMETER_IN); + connectInTrees(ret_actual_out_tree, ret_formal_out_tree, EdgeType::PARAMETER_OUT); } // step4: connect both control/data return edges of callee to the call site auto ret_insts = fw.getReturnInsts(); auto call_inst = cw.getCallInst(); Node *dst = _PDG->getNode(*call_inst); - assert(dst != nullptr && - "cannot add control edge to call node on nullptr!\n"); + assert(dst != nullptr && "cannot add control edge to call node on nullptr!\n"); // add control return edge - for (auto ret_inst : ret_insts) { + for (auto ret_inst : ret_insts) + { Node *src = _PDG->getNode(*ret_inst); if (src == nullptr) continue; src->addNeighbor(*dst, EdgeType::CONTROLDEP_CALLRET); } // add data return edge - for (auto ret_inst : ret_insts) { - Node *src = _PDG->getNode(*ret_inst); + for (auto ret_inst : ret_insts) + { + Node* src = _PDG->getNode(*ret_inst); if (src == nullptr) continue; src->addNeighbor(*dst, EdgeType::DATA_RET); @@ -180,33 +186,90 @@ void pdg::ProgramDependencyGraph::connectCallerAndCallee(CallWrapper &cw, } // ===== connect dependencies ===== -void pdg::ProgramDependencyGraph::connectIntraprocDependencies(Function &F) { +void pdg::ProgramDependencyGraph::connectIntraprocDependencies(Function &F) +{ // add control dependency edges - getAnalysis( - F); // add control dependencies for nodes in F + getAnalysis(F); // add control dependencies for nodes in F // connect formal tree with address variables - FunctionWrapper *func_w = getFuncWrapper(F); - Node *entry_node = func_w->getEntryNode(); - for (auto arg : func_w->getArgList()) { - Tree *formal_in_tree = func_w->getArgFormalInTree(*arg); + FunctionWrapper* func_w = getFuncWrapper(F); + Node* entry_node = func_w->getEntryNode(); + for (auto arg : func_w->getArgList()) + { + Tree* formal_in_tree = func_w->getArgFormalInTree(*arg); if (!formal_in_tree) return; - Tree *formal_out_tree = func_w->getArgFormalOutTree(*arg); - entry_node->addNeighbor(*formal_in_tree->getRootNode(), - EdgeType::PARAMETER_IN); - entry_node->addNeighbor(*formal_out_tree->getRootNode(), - EdgeType::PARAMETER_OUT); + Tree* formal_out_tree = func_w->getArgFormalOutTree(*arg); + entry_node->addNeighbor(*formal_in_tree->getRootNode(), EdgeType::PARAMETER_IN); + entry_node->addNeighbor(*formal_out_tree->getRootNode(), EdgeType::PARAMETER_OUT); connectFormalInTreeWithAddrVars(*formal_in_tree); connectFormalOutTreeWithAddrVars(*formal_out_tree); } - if (!func_w->hasNullRetVal()) { + if (!func_w->hasNullRetVal()) + { connectFormalInTreeWithAddrVars(*func_w->getRetFormalInTree()); connectFormalOutTreeWithAddrVars(*func_w->getRetFormalOutTree()); } } +bool pdg::ProgramDependencyGraph::checkChildNodes(Tree* src_tree, Tree* dst_tree) { + if (src_tree->size() != dst_tree->size()) + return false; + auto src_tree_root_node = src_tree->getRootNode(); + auto dst_tree_root_node = dst_tree->getRootNode(); + std::queue> node_pairs_queue; + node_pairs_queue.push(std::make_pair(src_tree_root_node, dst_tree_root_node)); + while (!node_pairs_queue.empty()) { + auto current_node_pair = node_pairs_queue.front(); + node_pairs_queue.pop(); + TreeNode* src = current_node_pair.first; + TreeNode* dst = current_node_pair.second; + if (src->numOfChild() != dst->numOfChild()) + return false; + auto src_node_children = src->getChildNodes(); + auto dst_node_children = dst->getChildNodes(); + for (int i = 0; i < src->numOfChild(); i++) { + node_pairs_queue.push(std::make_pair(src_node_children[i], dst_node_children[i])); + } + } + return true; +} + +bool pdg::ProgramDependencyGraph::isIndirectCallCandidates(CallWrapper &cw, FunctionWrapper &fw) { + auto actual_arg_list = cw.getArgList(); + auto formal_arg_list = fw.getArgList(); + if (actual_arg_list.size() != formal_arg_list.size()) { + return false; + } + int num_arg = actual_arg_list.size(); + for (int i = 0; i < num_arg; i++) { + Value *actual_arg = actual_arg_list[i]; + Argument *formal_arg = formal_arg_list[i]; + auto actual_in_tree = cw.getArgActualInTree(*actual_arg); + auto formal_in_tree = fw.getArgFormalInTree(*formal_arg); + if (actual_in_tree == nullptr || formal_in_tree == nullptr) + return false; + if (!checkChildNodes(actual_in_tree, formal_in_tree)) + return false; + auto actual_out_tree = cw.getArgActualOutTree(*actual_arg); + auto formal_out_tree = fw.getArgFormalInTree(*formal_arg); + if (actual_out_tree == nullptr || formal_out_tree == nullptr) + return false; + if (!checkChildNodes(actual_out_tree, formal_out_tree)) + return false; + } + Tree *ret_formal_in_tree = fw.getRetFormalInTree(); + Tree *ret_formal_out_tree = fw.getRetFormalOutTree(); + Tree *ret_actual_in_tree = cw.getRetActualInTree(); + Tree *ret_actual_out_tree = cw.getRetActualOutTree(); + if (ret_formal_in_tree == nullptr || ret_formal_out_tree == nullptr || ret_actual_in_tree == nullptr || ret_actual_out_tree == nullptr) + return false; + if (!checkChildNodes(ret_formal_in_tree, ret_actual_in_tree) || !checkChildNodes(ret_formal_out_tree, ret_actual_out_tree)) + return false; + return true; +} + void pdg::ProgramDependencyGraph::connectInterprocDependencies(Function &F) { auto &call_g = PDGCallGraph::getInstance(); @@ -217,21 +280,24 @@ void pdg::ProgramDependencyGraph::connectInterprocDependencies(Function &F) if (_PDG->hasCallWrapper(*call_inst)) { auto call_w = getCallWrapper(*call_inst); + if (!call_w) + continue; auto call_site_node = _PDG->getNode(*call_inst); - if (!call_w || !call_site_node) + if (!call_site_node) continue; - if (call_w->getCalledFunc() && call_w->getCalledFunc()->isVarArg()) + if (call_w->getCalledFunc() && call_w->getCalledFunc()->isVarArg()) continue; + for (auto arg : call_w->getArgList()) { - Tree *actual_in_tree = call_w->getArgActualInTree(*arg); + Tree* actual_in_tree = call_w->getArgActualInTree(*arg); if (!actual_in_tree) { // errs() << "[WARNING]: empty actual tree for callsite " << *call_inst << " in func " << F.getName() << "\n"; continue; } - Tree *actual_out_tree = call_w->getArgActualOutTree(*arg); + Tree* actual_out_tree = call_w->getArgActualOutTree(*arg); call_site_node->addNeighbor(*actual_in_tree->getRootNode(), EdgeType::PARAMETER_IN); call_site_node->addNeighbor(*actual_out_tree->getRootNode(), EdgeType::PARAMETER_OUT); connectActualInTreeWithAddrVars(*actual_in_tree, *call_inst); @@ -246,103 +312,113 @@ void pdg::ProgramDependencyGraph::connectInterprocDependencies(Function &F) connectActualOutTreeWithAddrVars(*call_w->getRetActualOutTree(), *call_inst); } + // connect call site with callee // direct call - if (call_w->getCalledFunc() != nullptr) - { + if (call_w->getCalledFunc() != nullptr) { auto called_func_w = getFuncWrapper(*call_w->getCalledFunc()); connectCallerAndCallee(*call_w, *called_func_w); - } - else - { + } else { // indirect call auto ind_called_funcs = call_g.getIndirectCallCandidates(*call_w->getCallInst(), *_module); - for (auto ind_called_func : ind_called_funcs) - { + for (auto ind_called_func : ind_called_funcs) { if (ind_called_func->isDeclaration() || ind_called_func->isVarArg()) continue; + auto called_func_w = getFuncWrapper(*ind_called_func); - connectCallerAndCallee(*call_w, *called_func_w); + if (isIndirectCallCandidates(*call_w, *called_func_w)) + connectCallerAndCallee(*call_w, *called_func_w); } } + + // auto called_func_w = getFuncWrapper(*call_w->getCalledFunc()); + // connectCallerAndCallee(*call_w, *called_func_w); } } } // ====== connect tree with variables ====== -void pdg::ProgramDependencyGraph::connectFormalInTreeWithAddrVars( - Tree &formal_in_tree) { - TreeNode *root_node = formal_in_tree.getRootNode(); - std::queue node_queue; +void pdg::ProgramDependencyGraph::connectFormalInTreeWithAddrVars(Tree &formal_in_tree) +{ + TreeNode* root_node = formal_in_tree.getRootNode(); + std::queue node_queue; node_queue.push(root_node); - while (!node_queue.empty()) { - TreeNode *current_node = node_queue.front(); + while (!node_queue.empty()) + { + TreeNode* current_node = node_queue.front(); node_queue.pop(); - TreeNode *parent_node = current_node->getParentNode(); - std::unordered_set parent_node_addr_vars; + TreeNode* parent_node = current_node->getParentNode(); + std::unordered_set parent_node_addr_vars; if (parent_node != nullptr) parent_node_addr_vars = parent_node->getAddrVars(); - for (auto addr_var : current_node->getAddrVars()) { + for (auto addr_var : current_node->getAddrVars()) + { if (!_PDG->hasNode(*addr_var)) continue; auto addr_var_node = _PDG->getNode(*addr_var); current_node->addNeighbor(*addr_var_node, EdgeType::PARAMETER_IN); - auto alias_nodes = - addr_var_node->getOutNeighborsWithDepType(EdgeType::DATA_ALIAS); - for (auto alias_node : alias_nodes) { - Value *alias_node_val = alias_node->getValue(); + auto alias_nodes = addr_var_node->getOutNeighborsWithDepType(EdgeType::DATA_ALIAS); + for (auto alias_node : alias_nodes) + { + Value* alias_node_val = alias_node->getValue(); if (alias_node_val == nullptr) continue; - if (parent_node_addr_vars.find(alias_node_val) != - parent_node_addr_vars.end()) + if (parent_node_addr_vars.find(alias_node_val) != parent_node_addr_vars.end()) continue; current_node->addNeighbor(*alias_node, EdgeType::PARAMETER_IN); } } - for (auto child_node : current_node->getChildNodes()) { + for (auto child_node : current_node->getChildNodes()) + { node_queue.push(child_node); } } } -void pdg::ProgramDependencyGraph::connectFormalOutTreeWithAddrVars( - Tree &formal_out_tree) { +void pdg::ProgramDependencyGraph::connectFormalOutTreeWithAddrVars(Tree &formal_out_tree) +{ TreeNode *root_node = formal_out_tree.getRootNode(); std::queue node_queue; node_queue.push(root_node); - while (!node_queue.empty()) { + while (!node_queue.empty()) + { TreeNode *current_node = node_queue.front(); node_queue.pop(); - for (auto addr_var : current_node->getAddrVars()) { + for (auto addr_var : current_node->getAddrVars()) + { if (!_PDG->hasNode(*addr_var)) continue; auto addr_var_node = _PDG->getNode(*addr_var); // TODO: add addr variables for formal out tree - if (pdgutils::hasWriteAccess(*addr_var)) { + if (pdgutils::hasWriteAccess(*addr_var)) + { addr_var_node->addNeighbor(*current_node, EdgeType::PARAMETER_OUT); current_node->addAccessTag(AccessTag::DATA_WRITE); } } - for (auto child_node : current_node->getChildNodes()) { + for (auto child_node : current_node->getChildNodes()) + { node_queue.push(child_node); } } } -void pdg::ProgramDependencyGraph::connectActualInTreeWithAddrVars( - Tree &actual_in_tree, CallInst &ci) { +void pdg::ProgramDependencyGraph::connectActualInTreeWithAddrVars(Tree &actual_in_tree, CallInst &ci) +{ TreeNode *root_node = actual_in_tree.getRootNode(); - std::set insts_before_ci = - pdgutils::getInstructionBeforeInst(ci); + std::set insts_before_ci = pdgutils::getInstructionBeforeInst(ci); std::queue node_queue; node_queue.push(root_node); - while (!node_queue.empty()) { + while (!node_queue.empty()) + { TreeNode *current_node = node_queue.front(); node_queue.pop(); - for (auto addr_var : current_node->getAddrVars()) { + for (auto addr_var : current_node->getAddrVars()) + { // only connect addr_var that are pred to the call site - if (Instruction *i = dyn_cast(addr_var)) { + if (Instruction *i = dyn_cast(addr_var)) + { if (insts_before_ci.find(i) == insts_before_ci.end()) continue; } @@ -352,23 +428,25 @@ void pdg::ProgramDependencyGraph::connectActualInTreeWithAddrVars( addr_var_node->addNeighbor(*current_node, EdgeType::PARAMETER_IN); } - for (auto child_node : current_node->getChildNodes()) { + for (auto child_node : current_node->getChildNodes()) + { node_queue.push(child_node); } } } -void pdg::ProgramDependencyGraph::connectActualOutTreeWithAddrVars( - Tree &actual_out_tree, CallInst &ci) { +void pdg::ProgramDependencyGraph::connectActualOutTreeWithAddrVars(Tree &actual_out_tree, CallInst &ci) +{ TreeNode *root_node = actual_out_tree.getRootNode(); - // std::set insts_after_ci = - // pdgutils::getInstructionAfterInst(ci); + // std::set insts_after_ci = pdgutils::getInstructionAfterInst(ci); std::queue node_queue; node_queue.push(root_node); - while (!node_queue.empty()) { + while (!node_queue.empty()) + { TreeNode *current_node = node_queue.front(); node_queue.pop(); - for (auto addr_var : current_node->getAddrVars()) { + for (auto addr_var : current_node->getAddrVars()) + { // only connect with succe insts of call sites // if (Instruction *i = dyn_cast(addr_var)) // { @@ -381,11 +459,13 @@ void pdg::ProgramDependencyGraph::connectActualOutTreeWithAddrVars( current_node->addNeighbor(*addr_var_node, EdgeType::PARAMETER_OUT); } - for (auto child_node : current_node->getChildNodes()) { + for (auto child_node : current_node->getChildNodes()) + { node_queue.push(child_node); } } } + static RegisterPass - PDG("pdg", "Program Dependency Graph Construction", false, true); + PDG("pdg", "Program Dependency Graph Construction", false, true); \ No newline at end of file diff --git a/src/Tree.cpp b/src/Tree.cpp index f815d30..b4b0767 100644 --- a/src/Tree.cpp +++ b/src/Tree.cpp @@ -7,6 +7,9 @@ pdg::TreeNode::TreeNode(const TreeNode &tree_node) : Node(tree_node.getNodeType( _func = tree_node.getFunc(); _node_di_type = tree_node.getDIType(); _node_type = tree_node.getNodeType(); + _depth = tree_node.getDepth(); + _parent_node = tree_node.getParentNode(); + _func = tree_node.getFunc(); } pdg::TreeNode::TreeNode(DIType *di_type, int depth, TreeNode *parent_node, Tree *tree, GraphNodeType node_type) : Node(node_type) @@ -74,7 +77,7 @@ void pdg::TreeNode::computeDerivedAddrVarsFromParent() // handle struct pointer auto grand_parent_node = _parent_node->getParentNode(); // TODO: now hanlde struct specifically, but should also verify on other aggregate pointer types - if (grand_parent_node != nullptr && dbgutils::isStructType(*_parent_node->getDIType()) && dbgutils::isStructPointerType(*grand_parent_node->getDIType())) + if (grand_parent_node != nullptr && dbgutils::isStructType(*(_parent_node->getDIType())) && dbgutils::isStructPointerType(*(grand_parent_node->getDIType()))) { base_node_addr_vars = grand_parent_node->getAddrVars(); } @@ -87,6 +90,8 @@ void pdg::TreeNode::computeDerivedAddrVarsFromParent() for (auto base_node_addr_var : base_node_addr_vars) { + if (base_node_addr_var == nullptr) + continue; for (auto user : base_node_addr_var->users()) { // handle load instruction, field should not get the load inst from the sturct pointer.