LLVM MPX (BOGO)


Q&A

  • How to determine the size of object?

  • How to store the bound info in MPX?

  • How to do bound checking?

  • How to choose the instrumentation point?

    • A Map from instruction to the pointer it want to access?

Reference 1

Overview

A module pass: class llmpx: public ModulePass

runOnModule(Module &)

Methods:

mpxPass(Module &)

harden_cfi(Module &)

create_global_constants(Module &)

collect_safe_access(Module &)

transform_functions(Module &)

transform_global(Module &)

process_each_function(Module &)

cleanup(Module &)

verify(Module &)

dead_bndstx_elimination(Module&);

dead_bndldx_elimination(Module&);

/*
 * Helper function, try to find the real value which has its id 
 * associated with it 
 */
Value* find_true_val_has_aa_id(Value*);

/*
 * remove any used bound, return the number of bound removed
 */
int remove_dead_bound(Module&);


/*
 * creat symbols for helper library
 */
void create_llmpx_symbols(Module&);

States:

/*
* the safe access list, which we don't need to check
* map from the instruction to the pointer it want to access
*/
std::map<Value*, Value*> safe_access_list;

/*
* MPX Intrinsic Functions
*/
Function* mpx_bndmk;
Function* mpx_bndldx;
Function* mpx_bndstx;
Function* mpx_bndclrr;
Function* mpx_bndclrm;
Function* mpx_bndcurr;
Function* mpx_bndcurm;
Function* mpx_bndcn;

runOnModule

Steps:

  • Create global constant bound. create_global_constants(module)
  • Create symbols, for
    • mpx intrinsic
    • external helper library
    • wrapper function in mpxwrap
    • bound cache function
  • call pass mpxTester or
  • call pass mpxPass

    // mpx/llmpx.cpp
    
    /*
    * stub function
    */
    bool llmpx::runOnModule(Module &module)
    {
    this->module = &module;
    ctx = &module.getContext();
    //prepare global constant bound
    create_global_constants(module);
    /*
     * create mpx intrinsic symbols
     */
    create_mpx_intr_symbols(module);
    /*
     * create symbols for external help library
     */
    create_llmpx_symbols(module);
    /*
     * create symbols for wrapper functions in mpxwrap
     */
    create_llmpx_wrapper_symbols(module);
    /*
     * create symbol for bound cache functions
     */
    create_llmpx_bnd_cache_symbols(module);
    #if USE_MPX_TESTER
    return mpxTester(module);
    #else
    return mpxPass(module);;
    #endif
    

mpxTester pass

  • Find the second alloca and insert mpx instructions after it.

    /*
    * This is MPX extension tester,
    * it grab the result of second alloc and insert the following code
    * to test the functionality of mpx 
    *
    *   bndmk r, bnd0
    *   bndstx r, bnd0
    *   bndldx r, bnd0
    *   bndcl r, bnd0
    *   bndcu r, bnd0
    *   r+=10
    *   bndcl r, bnd0; will generate #BR exception
    *
    * ----------------------------------
    *  int main()
    *  {
    *        char p[16] = "123";
    *        printf("%s\n", p);
    *        return 0;
    *  }
    */
    

To find the alloca instruction in a module:

// mpx/llmpx.cpp

    for (Module::iterator f_begin = module.begin(), f_end = module.end();
            f_begin != f_end; ++f_begin)
    {
        Function *func_ptr = dyn_cast<Function>(f_begin);
        errs()<<"Function : ";
        errs()<<func_ptr->getName();
        if (func_ptr->isDeclaration())
        {
            errs()<<" is external \n";
            continue;
        }
        errs()<<"\n";
        //find and get a pointer
        
        PointerType* Int8PtrTy = Type::getInt8PtrTy(*ctx);
        //Function::iterator bb_begin = func_ptr->begin();
        BasicBlock* bb_begin = & func_ptr->getEntryBlock();
        BasicBlock::iterator II = bb_begin->begin();
        int num_alloc = 0;
        while(II!=bb_begin->end())
        {
            Instruction *I = dyn_cast<Instruction>(II);
            if (isa<AllocaInst>(I) 
                && I->getType()->isPointerTy())
            {
              errs()<<"Found Ptr AllocaInst\n";
              I->print(errs());
              errs()<<"\n";
              errs()<<" return type:";
              I->getType()->print(errs());
              errs()<<"\n";
              ...
            }
            ++II;
        }

To Insert a new instruction:

  • Use IRBuilder<> builder (insertPoint). insertPoint is an instruction in the original code. New instruction will be inserted before the insert point.
  • Prepare args: std::vector<Value *> args; args.push_back(...)
  • Use CallInst::Create(func, args, "", I); to insert instruction. Not using builder???

    // mpx/llmpx.cpp
    
    /*
    * insert instruction:
    * 
    *    bndmk bnd, m32/m64  Make lower and upper bounds from m32/m64
    *                        and store them in bound register bnd.
    */
    Instruction *srcI = dyn_cast<Instruction>(II);
    
    Instruction *insertPoint = dyn_cast<Instruction>(++II);
    IRBuilder<> builder(insertPoint);
    Instruction *I = dyn_cast<Instruction>(II);
    
    std::vector<Value *> args;
    //args.push_back(ConstantPointerNull::get(Int8PtrTy));
    
    Value* ptr_arg_for_bndmk = builder.CreateBitCast(srcI,Int8PtrTy,"");
    args.push_back(ptr_arg_for_bndmk);
    Constant* dist_arg_for_bndmk = ConstantInt::get(Type::getInt64Ty(*ctx),(9));
    args.push_back(dist_arg_for_bndmk);
    
    Function *func = Intrinsic::getDeclaration(&module, 
                  Intrinsic::x86_bndmk);
    
    I = dyn_cast<Instruction>(II);
    
    errs()<<"Insert bndmk before this inst:";
    I->print(errs());
    errs()<<"\n";
    
    Instruction* bndmkcall = CallInst::Create(func, args, "", I);
    bndmkcall->print(errs());
    errs()<<"\n";
    

mpx Pass

Whole algorithm as stated in the source code (https://github.com/lzto/bogo):

  • gather bound information from each instruction and create bound make, bound check and bound propagation into a tree? or something else. The result would be saved to bound_checklist.
  • optimize bound_checklist to eliminate redundant checks.
  • insert corresponding instruction using bound_checklist.

List of methods and call paths:

mpxPass
 -> transform_global
 -> harden_cfi
 -> return false;

mpxPass
 -> collect_safe_access
 -> transform_functions
 -> transform_global
 -> process_each_function
 -> clean up
 -> verify
 -> return false;

// collect_safe_access
mpxPass
 -> collect_safe_access
     -> is_safe_access
        -> // update `safe_access_list[inst] = ptr_operand`

// transform_functions to pass bound information during a call/return
mpxPass
 -> transform_functions

// transform_global
mpxPass
 -> transform_global

// process_each_function
mpxPass
 -> process_each_function 
     -> process_bound_checklist 
         -> process_each_instruction

process_each_instruction
 -> handleGetElementPtr
     -> associate_meta
         -> process_each_instruction

process_each_instruction
 -> handleBitCast
     -> associate_meta
         -> process_each_instruction

process_each_instruction
  -> handleLoad()
      -> insert_check
          -> insert_bound_check
              -> process_each_instruction

process_each_instruction
  -> handleStore()
      -> insert_check
          -> insert_bound_check
              -> process_each_instruction
      -> process_each_instruction

process_each_instruction
 -> handleCall
     -> process_each_instruction
     -> transform_function_type
         -> add_transform_type_pair
             -> // add <orig_type, transformed_type> to tr_typelist.

process_each_instruction
 -> handleInvoke
     -> process_each_instruction
     -> transform_function_type
         -> add_transform_type_pair
             -> // add <orig_type, transformed_type> to tr_typelist.



process_each_instruction
 -> handleBinaryOperator (got #if 0'd)
     -> process_each_instruction

process_each_instruction
 -> handleInsertValue/PHINode/PtrToInt/Select/ 
     -> process_each_instruction

data structures:

Map/Table structure to store a list of new instructions/keys for each instruction or variable

std::map<Value*, std::list<Value*>*> bound_checklist: This stores the bound instruction need to be inserted for each instruction.

For example:

original instruction => bndmk/bndldx instructions

original instruction => bndmov/bndstx instructions

original instruction => bndcl/bndcu instructions

key_checklist. This stores the key instruction need to be inserted.

gv_bound_checklist. Global variable bound list, initialize only once for each application.

gv_key_checklist. Global variable key list, initialize only once for each application.

std::map<std::pair<Value*, Instruction*>, std::list<Value*>*> gv_bound_checklist_cache, gv_key_checklist_cache: global variable bound/key load cache. Each pair has a list of ???.

transform_functions

Scan function parameters and function return types, if any pointers, transform the function to add bnd information for pointers.

transform_functions
    -> function_need_to_be_transformed
        -> is_in_skip_list
        -> // check return type and parameter type; bnd needed if any ptr or struct->ptr.
    -> has_transformed_type
        -> // search tr_typelist for a type.
    -> get_transformed_type
        -> // get the transformed type, tr_typelist[orig_type].
    -> add_transform_type_pair
        -> // add <orig_type, transformed_type> to tr_typelist.
    -> Function::Create "_wbnd"
    -> // tr_flist[func_ptr] = new_func, the old func deleted
      // revtr_flist[new_func] = func_ptr
      // update flist_orig, flist_new

transform_global

Transform all global variables. Insert ctor and dtor function, and store bound in ctor.

//
for(GlobalVariable &gvi: module.globals())
  {
      GlobalVariable* gi = &gvi;
      if (gi->isDeclaration())
          continue;
      if (!isa<Value>(gi))
          continue;
      Value* gv = dyn_cast<Value>(gi);
      StringRef gvname = gv->getName();
      if (gvname.startswith("llvm.") || 
          gvname.startswith("llmpx_"))
          continue;
      bool gv_use_func = false;
      
      if (!gi->hasInitializer())
      {
          continue;
      }
      Constant* initializer = gi->getInitializer();
      Type* itype = initializer->getType();
      TotalStaticBNDAdded++;
      /*
        * make bound
        * initialization of constant bound has been changed from using bndmk
        * instruction to series of store instruction in .init_array section,
        * so that it can possibly be further optimized (i.e. make it as 
        * constant bound)
        */
      PointerType* Int8PtrTy = Type::getInt8PtrTy(*ctx);
      unsigned allocated_size = module.getDataLayout()
                      .getTypeAllocSize(itype);

      #if (DEBUG_MPX_PASS_1_5>2)
      errs()<<"bnd parm size:"<<allocated_size<<"\n";
      #endif
      /*
        * create global constant bound for it 
        */
      Type* ArrayTy = ArrayType::get(IntegerType::get(*ctx, 64), 2);
      GlobalVariable* gvbnd
          = new GlobalVariable(module,
              ArrayTy,
              false,
              gi->getLinkage(),
              0, "llmpx_bnd_"+gvname);
      gvbnd->setAlignment(16);
      gvbnd->setInitializer(Constant::getNullValue(ArrayTy));

      Type* int64ty = IntegerType::get(*ctx,64);
      Type* int64ptrty = Type::getInt64PtrTy(*ctx);
      /*
        * FIXME: the following instruction should be inserted into .init_array
        */
      Constant* lb = ConstantExpr::getPtrToInt(gi, int64ty);
      std::vector<Constant*> indic_lb;
      indic_lb.push_back(ConstantInt::get(int64ty,0));
      indic_lb.push_back(ConstantInt::get(int64ty,0));
      Constant* bnd_lb
          = ConstantExpr::getGetElementPtr(NULL, gvbnd, indic_lb);

      Constant* ub
          = ConstantExpr::getNeg(
              ConstantExpr::getAdd(
                  ConstantExpr::getPtrToInt(gi, int64ty),
                  ConstantInt::get(int64ty, (allocated_size))));
      std::vector<Constant*> indic_ub;
      indic_ub.push_back(ConstantInt::get(int64ty,0));
      indic_ub.push_back(ConstantInt::get(int64ty,1));
      Constant* bnd_ub
          = ConstantExpr::getGetElementPtr(
                  NULL,
                  gvbnd,
                  indic_ub);

      Instruction* inslb = builder.CreateStore(lb, bnd_lb);
      Instruction* insub = builder.CreateStore(ub, bnd_ub);
      gv_bound_checklist[gv] = new std::list<Value*>;
      gv_bound_checklist[gv]->push_back(inslb);
      gv_bound_checklist[gv]->push_back(insub);
      gv_bound_checklist[gv]->push_back(gvbnd);

      /*
        * create global key and lock
        */
      if (llmpx_enable_temporal_safety)
      {
          std::vector<Value*> args;
          args.push_back(ConstantExpr::getBitCast(gi,Int8PtrTy));
          args.push_back(ConstantInt::get(Type::getInt64Ty(*ctx), (allocated_size)));
          Value* key
              = builder.CreateCall(_llmpx_temporal_lock_alloca,
                          args, "llmpx_key."+gvname);

          auto* Int64Ty = Type::getInt64Ty(*ctx);

          GlobalVariable* gvkey
                = new GlobalVariable(module, Int64Ty,
                      false, gi->getLinkage(),
                      0, "llmpx_key_"+gvname);
          gvkey->setInitializer(ConstantInt::get(Int64Ty, 0));

          Instruction* keystore = builder.CreateStore(key,
              ConstantExpr::getPointerCast(gvkey, Type::getInt64PtrTy(*ctx)));

          gv_key_checklist[gv] = new std::list<Value*>;
          gv_key_checklist[gv]->push_back(keystore);
          gv_key_checklist[gv]->push_back(gvkey);
      }

process_each_function

get_bound

Value* get_bound(Value* v, Instruction* I): get bound for the value. If any instruction need to be inserted, should be inserted before I.

  • if is a ConstantExpr, :
    • search gv_bound_checklist_cache, a list of (v,I) pairs for a function.
    • return if found;
    • insert new pair if not found;
  • search bound_checklist
Created Jul 10, 2020 // Last Updated Feb 8, 2023

If you could revise
the fundmental principles of
computer system design
to improve security...

... what would you change?