Welcome > CHERI > Cheri LLVM > Passes > Bound Allocas

Bound Allocas

References:

llvm/lib/CodeGen/CheriBoundAllocas.cpp

Overview

A Module pass with instruction visitor: class CheriBoundAllocas : public ModulePass, public InstVisitor<CheriBoundAllocas>

Initialization: initializeCheriBoundAllocaPass()

declared in llvm/include/llvm/InitializePasses.h
impl in llvm/lib/CodeGen/CheriBoundAllocas.cpp: INITIALIZE_PASS(…)
called in:
- llvm/lib/CodeGen/CheriBoundAllocas.cpp: CheriBoundAllocas()
- llvm/lib/CodeGen/CodeGen.cpp: initializeCodeGen()
- llvm/tools/opt/opt.cpp: main()

Pass Creation: createCheriBoundAllocasPass()

declared in llvm/include/llvm/CodeGen/Passes.h
called in
- Mips: llvm/lib/Target/Mips/MipsTargetMachine.cpp: MipsPassConfig::addIRPasses()
- RISCV: llvm/lib/Target/RISCV/RISCVTargetMachine.cpp: RISCVPassConfig::addIRPasses()

Adding bound instruction:

Get intrinsic: Intrinsic::getDeclaration(M, Intrinsic::cheri_cap_bounds_set, SizeTy);

// llvm/lib/CodeGen/CheriBoundAllocas.cpp

// Intrinsic handle as function
Function *SetBoundsIntrin = Intrinsic::getDeclaration(M, Intrinsic::cheri_cap_bounds_set,
                                  SizeTy);

Intrinsic::ID BoundedStackCap = UseRematerializableIntrinsic
                                ? Intrinsic::cheri_bounded_stack_cap
                                : Intrinsic::cheri_cap_bounds_set;
Function *BoundedStackFn = Intrinsic::getDeclaration(M, BoundedStackCap, SizeTy);

// Use intrinsic to create IR instruction
LLVMContext &C = M->getContext();
IRBuilder<> B(C);

// one version
// convert to i8 for the intrinsic
Instruction *AllocaI8 = cast<Instruction>(B.CreateBitCast(AI, I8CapTy));
Value *SingleBoundedAlloc = B.CreateCall(SetBoundsIntrin, {AllocaI8, Size});
SingleBoundedAlloc = B.CreateBitCast(SingleBoundedAlloc, AllocaTy);

// another version
auto WithBounds = B.CreateCall(SetBoundsIntrin, {AllocaI8, Size});
const_cast<Use *>(U)->set(B.CreateBitCast(WithBounds, AllocaTy));

Functions/Steps:

runOnModule: Iterate over all functions and call runOnFunction
runOnFunction:
- visit(F) -> visitAllocaInst(): Store All AllocaInst in this function in vector Allocas
- For every AllocaInst in the function:
- Set the (AllocaInst + 1) as insertion point, so will be inserted immediately after the alloca.
- Check the type of AllocaInst, i.e. the type of data being allocated.
- Check if CHERI has precise bounds (TLI: TargetLowering), and determine the final bounds of the alloca object.
- Collect a set of Uses of the AllocaInst that needs to be inserted with a Bound instruction.
  - enum StackBoundsMethod has different modes: Never/AllUses/IfNeeded...
  - class CheriNeedBoundsChecker finds the uses that needs bound check.
- If alloca is dynamic alloca (!AI->isStaticAlloca()): use single intrinsic cheri_cap_bounds_set. If static, use cheri_bounded_stack_cap or cher_cap_bounds_set.
- For every uses of AllocaInst (I = cast<Instruction>(U->getUser())):
  - Check if reuse the result of a single csetbounds intrinsic
  - Reuse only when we are at -O0 or there are more than N users of this bounded stack capability.
  - If reuse the result of a single csetbounds intrinsic:
    - const_cast<Use *>(U)->set(SingleBoundedAlloc): Update the user as the user of the SingleBoundedAlloc instruction instead of the AllocaInst.
  - If not reusing the single intrinsic call:
  - Locate the Insertion point:
    - If the user instruction is is a PHINode, insert point is the terminator of the incoming basic block corresponding to the User instruction.
    - Otherwise, the insert point is the current user instruction.
  - Do Insertion of bounding instruction:
    - Create BitCast Instruction B.CreateBitCast(AI, I8CapTy)
    - Create Call Instruction B.CreateCall(SetBoundsIntrin, {AllocaI8, Size})
    - Create BitCast Instruction and set the AllocaInst as the user of this BitCast Inst:
    - const_cast<User *>(U)->set(B.CreateBitCast(WithBounds, AllocaTy))

Summary

Check all alloca instructions in a function, insert cheri_cap_bounds_set or cheri_bounded_stack_cap instruction to proper location to set/check the bound of alloca’d object.

If alloca’d object is non-pointer type, convert it to a pointer type, set the bounds, so that it can be checked in CHERI, and convert it back for normal use.

CheriBoundAllocas

#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Analysis/CheriBounds.h"
#include "llvm/Analysis/Utils/Local.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Cheri.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/CheriSetBounds.h"
#include "llvm/Transforms/Utils/Local.h"

#include <string>
#include <utility>
#include <unordered_map>

#include "llvm/IR/Verifier.h"

#define DEBUG_TYPE "cheri-bound-allocas"
using namespace llvm;
#define DBG_MESSAGE(...) LLVM_DEBUG(dbgs() << DEBUG_TYPE ": " << __VA_ARGS__)

namespace {

// TODO: remove this option after eval
static cl::opt<bool> UseRematerializableIntrinsic(
    "cheri-stack-bounds-allow-remat", cl::init(true),
    cl::desc("Use bounded.stack.cap() instead of bounds.set() for stack "
             "allocations (allows rematerialization)"),
    cl::Hidden);

// Loading/storing from constant stack indices does not need to use a small
// tightly bounded capability and can use $csp instead
// TODO: remove these options once we know what the best stragegy is?
// TODO: change this to an integer threshold (more than N uses -> reuse the same one)
static cl::opt<unsigned> SingleIntrinsicThreshold(
    "cheri-stack-bounds-single-intrinsic-threshold", cl::init(5),
    cl::desc("Reuse the result of a single CHERI bounds intrinsic if there are "
             "more than N uses (default=5). A value of 0 means always."),
    cl::Hidden);

// single option instead of the booleans?
enum class StackBoundsMethod {
  Never,
  ForAllUsesIfOneNeedsBounds, // This is not particularly useful, just for
                              // comparison
  IfNeeded,
  AllUses,
};

static cl::opt<StackBoundsMethod> BoundsSettingMode(
    "cheri-stack-bounds",
    cl::desc("Strategy for setting bounds on stack capabilities:"),
    cl::init(StackBoundsMethod::IfNeeded),
    cl::values(clEnumValN(StackBoundsMethod::Never, "never",
                          "Do not add bounds on stack allocations (UNSAFE!)"),
               clEnumValN(StackBoundsMethod::ForAllUsesIfOneNeedsBounds,
                          "all-or-none",
                          "Set stack allocation bounds for all uses if at "
                          "least one use neededs bounds, otherwise omit"),
               clEnumValN(StackBoundsMethod::IfNeeded, "if-needed",
                          "Set stack allocation bounds for all uses except for "
                          "loads/stores to statically known in-bounds offsets"),
               clEnumValN(StackBoundsMethod::AllUses, "all-uses",
                          "Set stack allocation bounds even for loads/stores "
                          "to statically known in-bounds offset")));

STATISTIC(NumProcessed,  "Number of allocas that were analyzed for CHERI bounds");
STATISTIC(NumDynamicAllocas,  "Number of dyanmic allocas that were analyzed"); // TODO: skip them
STATISTIC(NumUsesProcessed, "Total number of alloca uses that were analyzed");
STATISTIC(NumCompletelyUnboundedAllocas, "Number of allocas where CHERI bounds were completely unncessary");
STATISTIC(NumUsesWithBounds, "Number of alloca uses that had CHERI bounds added");
STATISTIC(NumUsesWithoutBounds, "Number of alloca uses that did not needed CHERI bounds");
STATISTIC(NumSingleIntrin, "Number of times that a single intrinisic was used instead of per-use");

class CheriBoundAllocas : public ModulePass, public InstVisitor<CheriBoundAllocas> {
  Module *M;
  llvm::SmallVector<AllocaInst *, 16> Allocas;
  Type *I8CapTy;
  Type *SizeTy;

public:
  static char ID;
  CheriBoundAllocas() : ModulePass(ID) {
    initializeCheriBoundAllocasPass(*PassRegistry::getPassRegistry());
  }
  StringRef getPassName() const override { return "CHERI bound stack allocations"; }
  void visitAllocaInst(AllocaInst &AI) { Allocas.push_back(&AI); }
  bool runOnModule(Module &Mod) override {
    M = &Mod;
    const DataLayout &DL = Mod.getDataLayout();
    unsigned AllocaAS = DL.getAllocaAddrSpace();

    // Early abort if we aren't using capabilities on the stack
    if (!DL.isFatPointer(AllocaAS))
      return false;

    LLVMContext &C = M->getContext();
    I8CapTy = Type::getInt8PtrTy(C, AllocaAS);
    SizeTy = Type::getIntNTy(C, DL.getIndexSizeInBits(AllocaAS));

    bool Modified = false;
    for (Function &F : Mod)
      Modified |= runOnFunction(F);

    return Modified;
  }

  bool runOnFunction(Function &F) {
    // always set bounds with optnone
    bool IsOptNone = F.hasFnAttribute(Attribute::OptimizeNone);
    // FIXME: should still ignore lifetime-start + lifetime-end intrinsics even at -O0
    const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
    const TargetMachine &TM = TPC.getTM<TargetMachine>();
    const TargetLowering *TLI = TM.getSubtargetImpl(F)->getTargetLowering();

    LLVMContext &C = M->getContext();
    Allocas.clear();
    visit(F);
    // Give up if this function has no allocas
    if (Allocas.empty())
      return false;

    LLVM_DEBUG(dbgs() << "\nChecking function " << F.getName() << "\n");

    // TODO: csetboundsexact and round up sizes
    // Keep around the non-rematerializable cheri_cap_bounds_set code path to
    // compare how much rematerialization can help
    Intrinsic::ID BoundedStackCap = UseRematerializableIntrinsic
                                        ? Intrinsic::cheri_bounded_stack_cap
                                        : Intrinsic::cheri_cap_bounds_set;
    const DataLayout &DL = M->getDataLayout();
    Function *BoundedStackFn =
        Intrinsic::getDeclaration(M, BoundedStackCap, SizeTy);
    StackBoundsMethod BoundsMode = BoundsSettingMode;

    IRBuilder<> B(C);

    for (AllocaInst *AI : Allocas) {
      const uint64_t TotalUses = AI->getNumUses();
      NumProcessed++;
      Function *SetBoundsIntrin = BoundedStackFn;
      // Insert immediately after the alloca
      B.SetInsertPoint(AI);
      B.SetInsertPoint(&*++B.GetInsertPoint());
      Align ForcedAlignment;

      PointerType *AllocaTy = AI->getType();
      assert(isCheriPointer(AllocaTy, &DL));
      Type *AllocationTy = AllocaTy->getElementType();
      Value *ArraySize = AI->getArraySize();

      // Create a new (AS 0) alloca
      // For imprecise capabilities, we need to increase the alignment for
      // on-stack allocations to ensure that we can create precise bounds.
      if (!TLI->cheriCapabilityTypeHasPreciseBounds()) {
        // If not a constant then definitely a DYNAMIC_STACKALLOC; alignment
        // requirements will be added later during legalisation.
        if (ConstantInt *CI = dyn_cast<ConstantInt>(ArraySize)) {
          uint64_t AllocaSize = DL.getTypeAllocSize(AllocationTy);
          AllocaSize *= CI->getValue().getLimitedValue();
          ForcedAlignment = TLI->getAlignmentForPreciseBounds(AllocaSize);
        }
      }
      AI->setAlignment(max(MaybeAlign(AI->getAlignment()), ForcedAlignment));
      // Only set bounds for allocas that escape this function
      bool NeedBounds = true;
      // Always set bounds if the function has the optnone attribute
      SmallVector<const Use *, 32> UsesThatNeedBounds;
      // If one of the bounded alloca users is a PHI we must reuse the single
      // intrinsic since PHIs must be the first instruction in the basic block
      // and we can't insert anything before. Theoretically we could still
      // use separate intrinsics for the other users but if we are already
      // saving a bounded stack slot we might as well reuse it.
      bool MustUseSingleIntrinsic = false;
      if (BoundsMode == StackBoundsMethod::Never) {
        NeedBounds = false;
      } else {
        CheriNeedBoundsChecker BoundsChecker(AI, DL);
        // With -O0 or =always we set bounds on every stack allocation even
        // if it is not necessary
        bool BoundAll = IsOptNone || BoundsMode == StackBoundsMethod::AllUses;
        BoundsChecker.findUsesThatNeedBounds(&UsesThatNeedBounds, BoundAll,
                                             &MustUseSingleIntrinsic);
        NeedBounds = !UsesThatNeedBounds.empty();
        NumUsesProcessed += TotalUses;
        DBG_MESSAGE(F.getName()
                        << ": " << UsesThatNeedBounds.size() << " of "
                        << TotalUses << " users need bounds for ";
                    AI->dump());
        // TODO: remove the all-or-nothing case
        if (NeedBounds &&
            BoundsMode == StackBoundsMethod::ForAllUsesIfOneNeedsBounds) {
          // We are compiling with the all-or-nothing case and found at least
          // one use that needs bounds -> set bounds on all uses
          UsesThatNeedBounds.clear();
          LLVM_DEBUG(dbgs() << "Checking if alloca needs bounds: "; AI->dump());

          BoundsChecker.findUsesThatNeedBounds(&UsesThatNeedBounds,
                                               /*BoundAllUses=*/true,
                                               &MustUseSingleIntrinsic);
        }
      }
      if (!NeedBounds) {
        NumCompletelyUnboundedAllocas++;
        DBG_MESSAGE("No need to set bounds on stack alloca"; AI->dump());
        continue;
      }

      if (!AI->isStaticAlloca()) {
        NumDynamicAllocas++;
        // TODO: skip bounds on dynamic allocas (maybe add a TLI hook to check
        // whether the backend already adds bounds to the dynamic_stackalloc)
        DBG_MESSAGE("Found dynamic alloca: must use single intrinisic and "
                    "bounds.set intrinisic");
        MustUseSingleIntrinsic = true;
        SetBoundsIntrin =
            Intrinsic::getDeclaration(M, Intrinsic::cheri_cap_bounds_set,
                                      SizeTy);
      }

      // Reuse the result of a single csetbounds intrinisic if we are at -O0 or
      // there are more than N users of this bounded stack capability.
      const bool ReuseSingleIntrinsicCall =
          MustUseSingleIntrinsic || IsOptNone ||
          UsesThatNeedBounds.size() >= SingleIntrinsicThreshold;

      NumUsesWithBounds += UsesThatNeedBounds.size();
      NumUsesWithoutBounds += TotalUses - UsesThatNeedBounds.size();
      // Get the size of the alloca
      unsigned ElementSize = DL.getTypeAllocSize(AllocationTy);
      Value *Size = ConstantInt::get(SizeTy, ElementSize);
      if (AI->isArrayAllocation())
        Size = B.CreateMul(Size, AI->getArraySize());

      if (AI->isStaticAlloca() && ForcedAlignment != Align()) {
        // Pad to ensure bounds don't overlap adjacent objects
        uint64_t AllocaSize =
            cast<ConstantInt>(Size)->getValue().getLimitedValue();
        TailPaddingAmount TailPadding =
          TLI->getTailPaddingForPreciseBounds(AllocaSize);
        if (TailPadding != TailPaddingAmount::None) {
          Type *AllocatedType =
              AI->isArrayAllocation()
                  ? ArrayType::get(
                        AI->getAllocatedType(),
                        cast<ConstantInt>(AI->getArraySize())->getZExtValue())
                  : AI->getAllocatedType();
          Type *PaddingType =
            ArrayType::get(Type::getInt8Ty(F.getContext()),
                           static_cast<uint64_t>(TailPadding));
          Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
          auto *NewAI =
            new AllocaInst(TypeWithPadding, AI->getType()->getAddressSpace(),
                           nullptr, "", AI);
          NewAI->takeName(AI);
          NewAI->setAlignment(MaybeAlign(AI->getAlignment()));
          NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
          NewAI->setSwiftError(AI->isSwiftError());
          NewAI->copyMetadata(*AI);

          auto *NewPtr = new BitCastInst(NewAI, AI->getType(), "", AI);
          AI->replaceAllUsesWith(NewPtr);
          AI->eraseFromParent();
          AI = NewAI;
          Size =
            ConstantInt::get(SizeTy,
                             AllocaSize + static_cast<uint64_t>(TailPadding));
        }
      }

      if (cheri::ShouldCollectCSetBoundsStats) {
        cheri::addSetBoundsStats(
            AI->getAlign().valueOrOne(), Size, getPassName(),
            cheri::SetBoundsPointerSource::Stack,
            "set bounds on " + cheri::inferLocalVariableName(AI),
            cheri::inferSourceLocation(AI));
      }
      LLVM_DEBUG(auto S = cheri::inferConstantValue(Size);
                 dbgs() << AI->getFunction()->getName()
                        << ": setting bounds on stack alloca to "
                        << (S ? Twine(*S) : Twine("<unknown>"));
                 AI->dump());

      Value *SingleBoundedAlloc = nullptr;
      if (ReuseSingleIntrinsicCall) {
        NumSingleIntrin++;
        // We need to convert it to an i8* for the intrinisic:
        Instruction *AllocaI8 =
          cast<Instruction>(B.CreateBitCast(AI, I8CapTy));
        SingleBoundedAlloc = B.CreateCall(SetBoundsIntrin, {AllocaI8, Size});
        SingleBoundedAlloc = B.CreateBitCast(SingleBoundedAlloc, AllocaTy);
      }
      for (const Use *U : UsesThatNeedBounds) {
        Instruction *I = cast<Instruction>(U->getUser());
        if (ReuseSingleIntrinsicCall) {
          const_cast<Use *>(U)->set(SingleBoundedAlloc);
        } else {
          if (auto PHI = dyn_cast<PHINode>(I)) {
            // For PHI nodes we can't insert just before the PHI, instead we
            // must insert it just before
            auto BB = PHI->getIncomingBlock(*U);
            // LLVM_DEBUG(dbgs() << "PHI use coming from"; BB->dump());
            B.SetInsertPoint(BB->getTerminator());
          } else {
            // insert a new intrinsic call before every use. This can avoid
            // stack spills but will result in additional instructions.
            // This should avoid spilling registers when using an alloca in a
            // different basic block.
            // TODO: there should be a MIR pass to merge unncessary calls
            B.SetInsertPoint(I);
          }
          // We need to convert it to an i8* for the intrinisic. Note: we must
          // not reuse a bitcast since otherwise that results in spilling the
          // register that was incremented and doing a setbounds in a different
          // basic block. This is stupid and we should be either using the
          // bounded capability everywhere or be doing inc+setoffset in the
          // other block.
          Instruction *AllocaI8 =
            cast<Instruction>(B.CreateBitCast(AI, I8CapTy));
          auto WithBounds = B.CreateCall(SetBoundsIntrin, {AllocaI8, Size});
          const_cast<Use *>(U)->set(B.CreateBitCast(WithBounds, AllocaTy));
        }
      }
    }
    return true;
  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<TargetPassConfig>();
    AU.setPreservesCFG();
  }
};

} // anonymous namespace

char CheriBoundAllocas::ID;
INITIALIZE_PASS(CheriBoundAllocas, DEBUG_TYPE,
                "CHERI add bounds to alloca instructions", false, false)

ModulePass *llvm::createCheriBoundAllocasPass(void) {
  return new CheriBoundAllocas();
}

If you could revise
the fundmental principles of
computer system design
to improve security...

... what would you change?

Bound Allocas

Overview

Summary

More