diff --git a/build.sh b/build.sh index 89cea5d77..1f75422cf 100755 --- a/build.sh +++ b/build.sh @@ -23,11 +23,11 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) SVFHOME="${SCRIPT_DIR}" sysOS=$(uname -s) arch=$(uname -m) -MajorLLVMVer=18 +MajorLLVMVer=22 LLVMVer=${MajorLLVMVer}.1.0 -UbuntuArmLLVM_RTTI="https://github.com/SVF-tools/SVF/releases/download/SVF-3.2/llvm-${MajorLLVMVer}.1.0-ubuntu22-rtti-aarch64.tar.gz" +UbuntuArmLLVM_RTTI="https://github.com/bjjwwang/SVF-LLVM/releases/download/22.1.0/llvm-${LLVMVer}-ubuntu22-rtti-aarch64.tar.gz" UbuntuArmLLVM="https://github.com/llvm/llvm-project/releases/download/llvmorg-${LLVMVer}/clang+llvm-${LLVMVer}-aarch64-linux-gnu.tar.xz" -UbuntuLLVM_RTTI="https://github.com/SVF-tools/SVF/releases/download/SVF-3.2/llvm-${MajorLLVMVer}.1.0-ubuntu20-rtti-x86-64.tar.gz" +UbuntuLLVM_RTTI="https://github.com/bjjwwang/SVF-LLVM/releases/download/22.1.0/llvm-${LLVMVer}-ubuntu22-rtti-x86-64.tar.gz" UbuntuLLVM="https://github.com/llvm/llvm-project/releases/download/llvmorg-${LLVMVer}/clang+llvm-${LLVMVer}-x86_64-linux-gnu-ubuntu-18.04.tar.xz" SourceLLVM="https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-${LLVMVer}.zip" UbuntuZ3="https://github.com/Z3Prover/z3/releases/download/z3-4.8.8/z3-4.8.8-x64-ubuntu-16.04.zip" diff --git a/setup.sh b/setup.sh index a7dc8ce34..1b284abe4 100755 --- a/setup.sh +++ b/setup.sh @@ -18,7 +18,7 @@ function set_llvm { [[ -n "$LLVM_DIR" ]] && return 0 # use local download directory - LLVM_DIR="$SVF_DIR/llvm-18.1.0.obj" + LLVM_DIR="$SVF_DIR/llvm-22.1.0.obj" [[ -d "$LLVM_DIR" ]] && return 0 # ... otherwise don't set LLVM_DIR diff --git a/svf-llvm/include/SVF-LLVM/BasicTypes.h b/svf-llvm/include/SVF-LLVM/BasicTypes.h index 84f2104f3..e6b28a9dd 100644 --- a/svf-llvm/include/SVF-LLVM/BasicTypes.h +++ b/svf-llvm/include/SVF-LLVM/BasicTypes.h @@ -208,17 +208,21 @@ typedef llvm::MinMaxIntrinsic MinMaxIntrinsic; typedef llvm::BinaryOpIntrinsic BinaryOpIntrinsic; typedef llvm::WithOverflowInst WithOverflowInst; typedef llvm::SaturatingInst SaturatingInst; +#if LLVM_VERSION_MAJOR < 22 typedef llvm::AtomicMemIntrinsic AtomicMemIntrinsic; typedef llvm::AtomicMemSetInst AtomicMemSetInst; typedef llvm::AtomicMemTransferInst AtomicMemTransferInst; typedef llvm::AtomicMemCpyInst AtomicMemCpyInst; typedef llvm::AtomicMemMoveInst AtomicMemMoveInst; +#endif typedef llvm::MemIntrinsic MemIntrinsic; typedef llvm::MemSetInst MemSetInst; typedef llvm::MemTransferInst MemTransferInst; typedef llvm::MemCpyInst MemCpyInst; typedef llvm::MemMoveInst MemMoveInst; +#if LLVM_VERSION_MAJOR < 22 typedef llvm::MemCpyInlineInst MemCpyInlineInst; +#endif typedef llvm::AnyMemIntrinsic AnyMemIntrinsic; typedef llvm::AnyMemSetInst AnyMemSetInst; typedef llvm::AnyMemTransferInst AnyMemTransferInst; diff --git a/svf-llvm/include/SVF-LLVM/LLVMUtil.h b/svf-llvm/include/SVF-LLVM/LLVMUtil.h index 1f0127f02..1be428b7a 100644 --- a/svf-llvm/include/SVF-LLVM/LLVMUtil.h +++ b/svf-llvm/include/SVF-LLVM/LLVMUtil.h @@ -310,6 +310,7 @@ inline const ConstantExpr* isUnaryConstantExpr(const Value* val) } //@} +#if LLVM_VERSION_MAJOR < 22 inline static DataLayout* getDataLayout(Module* mod) { static DataLayout *dl = nullptr; @@ -317,6 +318,12 @@ inline static DataLayout* getDataLayout(Module* mod) dl = new DataLayout(mod); return dl; } +#else +inline static const DataLayout* getDataLayout(Module* mod) +{ + return mod ? &mod->getDataLayout() : nullptr; +} +#endif /// Get the next instructions following control flow void getNextInsts(const Instruction* curInst, diff --git a/svf-llvm/lib/LLVMModule.cpp b/svf-llvm/lib/LLVMModule.cpp index 9f9a0c441..39fcd6600 100644 --- a/svf-llvm/lib/LLVMModule.cpp +++ b/svf-llvm/lib/LLVMModule.cpp @@ -518,11 +518,19 @@ void LLVMModuleSet::addSVFMain() // Collect ctor and dtor functions for (const GlobalVariable& global : mod.globals()) { +#if LLVM_VERSION_MAJOR < 22 if (global.getName().equals(SVF_GLOBAL_CTORS) && global.hasInitializer()) +#else + if (global.getName() == SVF_GLOBAL_CTORS && global.hasInitializer()) +#endif { ctor_funcs = getLLVMGlobalFunctions(&global); } +#if LLVM_VERSION_MAJOR < 22 else if (global.getName().equals(SVF_GLOBAL_DTORS) && global.hasInitializer()) +#else + else if (global.getName() == SVF_GLOBAL_DTORS && global.hasInitializer()) +#endif { dtor_funcs = getLLVMGlobalFunctions(&global); } @@ -533,9 +541,13 @@ void LLVMModuleSet::addSVFMain() { auto funName = func.getName(); +#if LLVM_VERSION_MAJOR < 22 assert(!funName.equals(SVF_MAIN_FUNC_NAME) && SVF_MAIN_FUNC_NAME " already defined"); - if (funName.equals("main")) +#else + assert(funName != SVF_MAIN_FUNC_NAME && SVF_MAIN_FUNC_NAME " already defined"); + if (funName == "main") +#endif { orgMain = &func; mainMod = &mod; diff --git a/svf-llvm/lib/LLVMUtil.cpp b/svf-llvm/lib/LLVMUtil.cpp index ed3cd9462..73a3df996 100644 --- a/svf-llvm/lib/LLVMUtil.cpp +++ b/svf-llvm/lib/LLVMUtil.cpp @@ -461,11 +461,28 @@ const std::string LLVMUtil::getSourceLoc(const Value* val ) { if (SVFUtil::isa(inst)) { -#if LLVM_VERSION_MAJOR > 16 +#if LLVM_VERSION_MAJOR >= 22 + for (llvm::DbgVariableRecord *DVR : llvm::findDVRDeclares(const_cast(inst))) + { + if (DVR->isDbgDeclare()) + { + llvm::DIVariable *DIVar = DVR->getVariable(); + rawstr << "\"ln\": " << DIVar->getLine() << ", \"fl\": \"" << DIVar->getFilename().str() << "\""; + break; + } + } +#elif LLVM_VERSION_MAJOR > 16 for (llvm::DbgInfoIntrinsic *DII : llvm::findDbgDeclares(const_cast(inst))) + { + if (llvm::DbgDeclareInst *DDI = SVFUtil::dyn_cast(DII)) + { + llvm::DIVariable *DIVar = SVFUtil::cast(DDI->getVariable()); + rawstr << "\"ln\": " << DIVar->getLine() << ", \"fl\": \"" << DIVar->getFilename().str() << "\""; + break; + } + } #else for (llvm::DbgInfoIntrinsic *DII : FindDbgDeclareUses(const_cast(inst))) -#endif { if (llvm::DbgDeclareInst *DDI = SVFUtil::dyn_cast(DII)) { @@ -474,6 +491,7 @@ const std::string LLVMUtil::getSourceLoc(const Value* val ) break; } } +#endif } else if (MDNode *N = inst->getMetadata("dbg")) // Here I is an LLVM instruction { diff --git a/svf-llvm/lib/SVFIRBuilder.cpp b/svf-llvm/lib/SVFIRBuilder.cpp index 587bfc3b0..1ca120d2d 100644 --- a/svf-llvm/lib/SVFIRBuilder.cpp +++ b/svf-llvm/lib/SVFIRBuilder.cpp @@ -645,7 +645,11 @@ bool SVFIRBuilder::computeGepOffset(const User *V, AccessPath& ap) assert(V); const llvm::GEPOperator *gepOp = SVFUtil::dyn_cast(V); +#if LLVM_VERSION_MAJOR < 22 DataLayout * dataLayout = getDataLayout(llvmModuleSet()->getMainLLVMModule()); +#else + const DataLayout* dataLayout = getDataLayout(llvmModuleSet()->getMainLLVMModule()); +#endif llvm::APInt byteOffset(dataLayout->getIndexSizeInBits(gepOp->getPointerAddressSpace()),0,true); if(gepOp && dataLayout && gepOp->accumulateConstantOffset(*dataLayout,byteOffset)) { @@ -924,10 +928,39 @@ void SVFIRBuilder::InitialGlobal(const GlobalVariable *gvar, Constant *C, { if(cppUtil::isValVtbl(gvar) && !Options::VtableInSVFIR()) return; + // LLVM >=17 emits global initializers of aligned structs as anonymous + // (literal) structs with explicit [N x i8] zero-initialized padding + // fields. That adds extra field positions that are absent in the named + // struct type used for accesses, causing field-index drift between the + // init side and the access side (e.g. @f1 stored at fld 2, read at + // fld 1). For literal structs, skip pure-padding operands and reindex. + const StructType* litST = + SVFUtil::dyn_cast(C->getType()); + const bool isPaddedLiteralStruct = + SVFUtil::isa(C) && litST && litST->isLiteral(); + u32_t logicalIdx = 0; for (u32_t i = 0, e = C->getNumOperands(); i != e; i++) { - u32_t off = pag->getFlattenedElemIdx(llvmModuleSet()->getSVFType(C->getType()), i); - InitialGlobal(gvar, SVFUtil::cast(C->getOperand(i)), offset + off); + Constant* op = SVFUtil::cast(C->getOperand(i)); + if (isPaddedLiteralStruct) + { + // padding = [N x i8] zero-initialized + const ArrayType* at = + SVFUtil::dyn_cast(op->getType()); + if (at && at->getElementType()->isIntegerTy(8) && + SVFUtil::isa(op)) + { + continue; + } + InitialGlobal(gvar, op, offset + logicalIdx); + logicalIdx++; + } + else + { + u32_t off = pag->getFlattenedElemIdx( + llvmModuleSet()->getSVFType(C->getType()), i); + InitialGlobal(gvar, op, offset + off); + } } } else if(ConstantData* data = SVFUtil::dyn_cast(C)) diff --git a/svf-llvm/tools/Example/svf-ex.cpp b/svf-llvm/tools/Example/svf-ex.cpp index 46455a1c0..2a2c1276f 100644 --- a/svf-llvm/tools/Example/svf-ex.cpp +++ b/svf-llvm/tools/Example/svf-ex.cpp @@ -212,7 +212,9 @@ int main(int argc, char ** argv) LLVMModuleSet::getLLVMModuleSet()->dumpModulesToFile(".svf.bc"); SVF::LLVMModuleSet::releaseLLVMModuleSet(); +#if LLVM_VERSION_MAJOR < 22 llvm::llvm_shutdown(); +#endif return 0; } diff --git a/svf/lib/AE/Svfexe/AEDetector.cpp b/svf/lib/AE/Svfexe/AEDetector.cpp index ec91a1a33..b8d2cbb79 100644 --- a/svf/lib/AE/Svfexe/AEDetector.cpp +++ b/svf/lib/AE/Svfexe/AEDetector.cpp @@ -352,10 +352,17 @@ void BufOverflowDetector::updateGepObjOffsetFromBase(const SVF::ICFGNode* node, for (const auto& gepAddr : gepAddrs) { NodeID gepObj = as.getIDFromAddr(gepAddr); - if (const GepObjVar* gepObjVar = SVFUtil::dyn_cast(svfir->getSVFVar(gepObj))) + const SVFVar* gepSvfVar = svfir->getSVFVar(gepObj); + if (const GepObjVar* gepObjVar = SVFUtil::dyn_cast(gepSvfVar)) { addToGepObjOffsetFromBase(gepObjVar, offset); } + else if (SVFUtil::isa(gepSvfVar)) + { + // With -ff-eq-base, a GEP with zero net offset maps to the + // base itself; offset-from-base is implicitly 0, so nothing + // to record. getAccessOffset handles BaseObjVar directly. + } else { assert(AbstractState::isBlackHoleObjAddr(gepAddr) && "GEP object is neither a GepObjVar nor an invalid memory address"); @@ -371,7 +378,8 @@ void BufOverflowDetector::updateGepObjOffsetFromBase(const SVF::ICFGNode* node, for (const auto& gepAddr : gepAddrs) { NodeID gepObj = as.getIDFromAddr(gepAddr); - if (const GepObjVar* gepObjVar = SVFUtil::dyn_cast(svfir->getSVFVar(gepObj))) + const SVFVar* gepSvfVar = svfir->getSVFVar(gepObj); + if (const GepObjVar* gepObjVar = SVFUtil::dyn_cast(gepSvfVar)) { if (hasGepObjOffsetFromBase(objVar)) { @@ -387,6 +395,10 @@ void BufOverflowDetector::updateGepObjOffsetFromBase(const SVF::ICFGNode* node, "GEP RHS object has no offset from base"); } } + else if (SVFUtil::isa(gepSvfVar)) + { + // With -ff-eq-base, net offset == 0 collapses to the base. + } else { assert(AbstractState::isBlackHoleObjAddr(gepAddr) && "GEP object is neither a GepObjVar nor an invalid memory address"); diff --git a/svf/lib/MemoryModel/PointerAnalysisImpl.cpp b/svf/lib/MemoryModel/PointerAnalysisImpl.cpp index 384fa9b2d..9325788a4 100644 --- a/svf/lib/MemoryModel/PointerAnalysisImpl.cpp +++ b/svf/lib/MemoryModel/PointerAnalysisImpl.cpp @@ -133,9 +133,14 @@ void BVDataPTAImpl::finalize() void BVDataPTAImpl::expandFIObjs(const PointsTo& pts, PointsTo& expandedPts) { expandedPts = pts;; + const bool ffEqBase = Options::FirstFieldEqBase(); for(PointsTo::iterator pit = pts.begin(), epit = pts.end(); pit!=epit; ++pit) { - if (pag->getBaseObjVarID(*pit) == *pit || isFieldInsensitive(*pit)) + const bool isBase = pag->getBaseObjVarID(*pit) == *pit; + const bool isFI = isFieldInsensitive(*pit); + // With -ff-eq-base, a base object denotes the first field (not the + // whole aggregate), so it must not be expanded to all fields. + if ((isBase && !ffEqBase) || isFI) { expandedPts |= pag->getAllFieldsObjVars(*pit); } @@ -145,9 +150,12 @@ void BVDataPTAImpl::expandFIObjs(const PointsTo& pts, PointsTo& expandedPts) void BVDataPTAImpl::expandFIObjs(const NodeBS& pts, NodeBS& expandedPts) { expandedPts = pts; + const bool ffEqBase = Options::FirstFieldEqBase(); for (const NodeID o : pts) { - if (pag->getBaseObjVarID(o) == o || isFieldInsensitive(o)) + const bool isBase = pag->getBaseObjVarID(o) == o; + const bool isFI = isFieldInsensitive(o); + if ((isBase && !ffEqBase) || isFI) { expandedPts |= pag->getAllFieldsObjVars(o); } diff --git a/svf/lib/Util/Options.cpp b/svf/lib/Util/Options.cpp index 4b958ceb5..1bf0d300f 100644 --- a/svf/lib/Util/Options.cpp +++ b/svf/lib/Util/Options.cpp @@ -170,7 +170,7 @@ Option Options::HandBlackHole( const Option Options::FirstFieldEqBase( "ff-eq-base", "Treat base objects as their first fields", - false + true );