Freebsd


Q&A

  • How to load a section from file and read its contents?
    • see example code parse_notes() in sys/kern/imgact_elf.c ———————–

References:

The overview of how a execve system call loads a binary and start executing a binary file is discussed based on the Linux previously. Here this post aims to track more details on the source code about how the ELF file is parsed and mapped into the address space in the FreeBSD.

do_execve

Implementation is similar.

FreeBSD has a struct image_params imgp; store similar info as the struct linux_binprm in Linux.

Calling ELF Image Activator

calling image activators

Loading ELF file

References:

ELF binary is loaded using function _CONCAT(exec, __elfN(imgact))(struct image_params *imgp). This is a macro based function name definition, can be compiled to exec_elf64_imgact() for example.

  • Check three types of sections: PT_LOAD, PT_INTERP, PT_GNU_STACK, and PT_PHDR. Update image_params accordingly.
  • Detect ELF binary (sub)type. __elfN(get_branchinfo). Dynamic or not. (dso)
  • Decide whether to enable randomization of user mappings.
    • update imgp->map_flags accordingly;
  • call exec_new_vmspace(imgp, sv): clear old virtual address space, and set up new stack.
  • call __elfN(load_sections): loading the contents of sections.
  • call __elfN(enforce_limits):

Reading program headers

// sys/kern/imgact_elf.c
// http://fxr.watson.org/fxr/source/kern/imgact_elf.c?v=FREEBSD-12-STABLE#L1106

static int
__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
{
  /// ...
	for (i = 0; i < hdr->e_phnum; i++) {
		switch (phdr[i].p_type) {
		case PT_LOAD:
			if (n == 0)
				baddr = phdr[i].p_vaddr;
			if (phdr[i].p_align > maxalign)
				maxalign = phdr[i].p_align;
			mapsz += phdr[i].p_memsz;
			n++;

			/*
			 * If this segment contains the program headers,
			 * remember their virtual address for the AT_PHDR
			 * aux entry. Static binaries don't usually include
			 * a PT_PHDR entry.
			 */
			if (phdr[i].p_offset == 0 &&
			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
				<= phdr[i].p_filesz)
				proghdr = phdr[i].p_vaddr + hdr->e_phoff;
			break;
		case PT_INTERP:
			/* Path to interpreter */
			if (interp != NULL) {
				uprintf("Multiple PT_INTERP headers\n");
				error = ENOEXEC;
				goto ret;
			}
			error = __elfN(get_interp)(imgp, &phdr[i], &interp,
			    &free_interp);
			if (error != 0)
				goto ret;
			break;
		case PT_GNU_STACK:
			if (__elfN(nxstack))
				imgp->stack_prot =
				    __elfN(trans_prot)(phdr[i].p_flags);
			imgp->stack_sz = phdr[i].p_memsz;
			break;
		case PT_PHDR: 	/* Program header table info */
			proghdr = phdr[i].p_vaddr;
			break;
		}
	}
  /// ...
}

Reading brandinfo from ELF binary

get_branchinfo —> parse_notes():

Call path:

__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
=> brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel, &fctl0);
   static Elf_Brandinfo * 
   __elfN(get_brandinfo)(struct image_params *imgp, const char *interp, 
   						int32_t *osrel, uint32_t *fctl0)
   => ret = __elfN(check_note)(imgp, bi->brand_note, osrel, fctl0);
      => static boolean_t
		__elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote,
			int32_t *osrel, uint32_t *fctl0){
			for (i = 0; i < hdr->e_phnum; i++) {
				if (phdr[i].p_type == PT_NOTE && __elfN(parse_notes)(imgp,
					&brandnote->hdr, brandnote->vendor, &phdr[i], brandnote_cb,
					&b_arg)) {
					for (j = 0; j < hdr->e_phnum; j++) {
						if (phdr[j].p_type == PT_NOTE &&
							__elfN(parse_notes)(imgp, &fctl_note,
							FREEBSD_ABI_VENDOR, &phdr[j],
							note_fctl_cb, &f_arg))
							break;
					}
					return (TRUE);
				}
			}
		}
		=> __elfN(parse_notes)(imgp, &fctl_note,
				    FREEBSD_ABI_VENDOR, &phdr[j],
				    note_fctl_cb, &f_arg));

parse_notes():

// sys/kern/imgact_elf.c

static boolean_t
__elfN(parse_notes)(struct image_params *imgp, Elf_Note *checknote,
    const char *note_vendor, const Elf_Phdr *pnote,
    boolean_t (*cb)(const Elf_Note *, void *, boolean_t *), void *cb_arg)
{
	const Elf_Note *note, *note0, *note_end;
	const char *note_name;
	char *buf;
	int i, error;
	boolean_t res;
	
	/* We need some limit, might as well use PAGE_SIZE. */
	if (pnote == NULL || pnote->p_filesz > PAGE_SIZE)
		return (FALSE);
	ASSERT_VOP_LOCKED(imgp->vp, "parse_notes");
	if (pnote->p_offset > PAGE_SIZE ||
	    pnote->p_filesz > PAGE_SIZE - pnote->p_offset) {
		buf = malloc(pnote->p_filesz, M_TEMP, M_NOWAIT);
		if (buf == NULL) {
			VOP_UNLOCK(imgp->vp);
			buf = malloc(pnote->p_filesz, M_TEMP, M_WAITOK);
			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
		}
		error = vn_rdwr(UIO_READ, imgp->vp, buf, pnote->p_filesz,
		    pnote->p_offset, UIO_SYSSPACE, IO_NODELOCKED,
		    curthread->td_ucred, NOCRED, NULL, curthread);
		if (error != 0) {
			uprintf("i/o error PT_NOTE\n");
			goto retf;
		}
		note = note0 = (const Elf_Note *)buf;
		note_end = (const Elf_Note *)(buf + pnote->p_filesz);
	} else {
		note = note0 = (const Elf_Note *)(imgp->image_header +
		    pnote->p_offset);
		note_end = (const Elf_Note *)(imgp->image_header +
		    pnote->p_offset + pnote->p_filesz);
		buf = NULL;
	}
	for (i = 0; i < 100 && note >= note0 && note < note_end; i++) {
		if (!aligned(note, Elf32_Addr) || (const char *)note_end -
		    (const char *)note < sizeof(Elf_Note)) {
			goto retf;
		}
		if (note->n_namesz != checknote->n_namesz ||
		    note->n_descsz != checknote->n_descsz ||
		    note->n_type != checknote->n_type)
			goto nextnote;
		note_name = (const char *)(note + 1);
		if (note_name + checknote->n_namesz >=
		    (const char *)note_end || strncmp(note_vendor,
		    note_name, checknote->n_namesz) != 0)
			goto nextnote;

		if (cb(note, cb_arg, &res))
			goto ret;
nextnote:
		note = (const Elf_Note *)((const char *)(note + 1) +
		    roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) +
		    roundup2(note->n_descsz, ELF_NOTE_ROUNDSIZE));
	}
retf:
	res = FALSE;
ret:
	free(buf, M_TEMP);
	return (res);
}

Set up stack

exec_new_vmspace(): clear old address space, and set up a new stack.

Call path:

do_execve()
=> __CONCAT(exec_,(imgact)) // sys/kern/imgact_elf.c
   => exec_new_vmspace(imgp, sv)  // sys/kern/kern_exec.c

Input is imgp and struct sysentvec *sv. struct sysentvec contains a pointer to system call table, a function translating trap-to-signal mapping, …

steps in exec_new_vmspace():

  • Blow away entire process VM.
    • if shared, create new VM space.
  • Map shared page if any;
  • Allocate stack.

    // sys/kern/kern_exec.c
    
    /*
    * Destroy old address space, and allocate a new stack.
    *	The new stack is only sgrowsiz large because it is grown
    *	automatically on a page fault.
    */
    int
    exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
    {
    ..
    }
    

Loading sections

Call path:

load_sections 
=> load_section
   => map_insert
// sys/kern/imgact_elf.c

// load_sections => load_section
static int
__elfN(load_sections)(struct image_params *imgp, const Elf_Ehdr *hdr,
    const Elf_Phdr *phdr, u_long rbase, u_long *base_addrp, u_long *max_addrp)
{
	
	for (i = 0; i < hdr->e_phnum; i++) {
		if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0)
			continue;

		/* Loadable segment */
		prot = __elfN(trans_prot)(phdr[i].p_flags);
		error = __elfN(load_section)(imgp, phdr[i].p_offset,
		    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
		    phdr[i].p_memsz, phdr[i].p_filesz, prot);
		if (error != 0)
			return (error);
    ...
}

load_section => map_insert

  • Preparing the page address (keep aligned) for the mapped addr, file addr, and map length.
  • static int
    __elfN(load_section)(struct image_params *imgp, vm_ooffset_t offset,
    caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot)
    {
    	object = imgp->object;
    	map = &imgp->proc->p_vmspace->vm_map;
    	map_addr = trunc_page((vm_offset_t)vmaddr);
    	file_addr = trunc_page(offset);
    
    	/*
    	 * We have two choices.  We can either clear the data in the last page
    	 * of an oversized mapping, or we can start the anon mapping a page
    	 * early and copy the initialized data into that first page.  We
    	 * choose the second.
    	 */
    	if (filsz == 0)
    		map_len = 0;
    	else if (memsz > filsz)
    		map_len = trunc_page(offset + filsz) - file_addr;
    	else
    		map_len = round_page(offset + filsz) - file_addr;
    
    	...
    
    	if (map_len != 0) {
    		...
    		rv = __elfN(map_insert)(imgp, map, object, file_addr,
    		    map_addr, map_addr + map_len, prot, cow);
    		...
    	}
    	...
    }
    

map_insert => { map_partial; vm_map_fixed; copyout_implicit_cap; vm_imgact_map_page; vm_imgact_unmap_page; }

static int
__elfN(map_insert)(struct image_params *imgp, vm_map_t map, vm_object_t object,
    vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot,
    int cow)
{
	rv = __elfN(map_partial)(map, object, offset, start,
                         round_page(start), prot);
	
	...
	if ((offset & PAGE_MASK) != 0) {
		/*
		 * The mapping is not page aligned.  This means that we have
		 * to copy the data.
		 */
		rv = vm_map_fixed(map, NULL, 0, start, end - start,
		    prot | VM_PROT_WRITE, VM_PROT_ALL, MAP_CHECK_EXCL);
		if (rv != KERN_SUCCESS)
			return (rv);
		if (object == NULL)
			return (KERN_SUCCESS);
		for (; start < end; start += sz) {
			sf = vm_imgact_map_page(object, offset);
			if (sf == NULL)
				return (KERN_FAILURE);
			off = offset - trunc_page(offset);
			sz = end - start;
			if (sz > PAGE_SIZE - off)
				sz = PAGE_SIZE - off;
			error = copyout_implicit_cap((caddr_t)sf_buf_kva(sf) + off,
			    (caddr_t)start, sz);
			vm_imgact_unmap_page(sf);
			if (error != 0)
				return (KERN_FAILURE);
			offset += sz;
		}
	} else {
		vm_object_reference(object);
		rv = vm_map_fixed(map, object, offset, start, end - start,
		    prot, VM_PROT_ALL, cow | MAP_CHECK_EXCL |
		    (object != NULL ? MAP_VN_EXEC : 0));
		if (rv != KERN_SUCCESS) {
			locked = VOP_ISLOCKED(imgp->vp);
			VOP_UNLOCK(imgp->vp);
			vm_object_deallocate(object);
			vn_lock(imgp->vp, locked | LK_RETRY);
			return (rv);
		} else if (object != NULL) {
			MPASS(imgp->vp->v_object == object);
			VOP_SET_TEXT_CHECKED(imgp->vp);
		}
	}
	return (KERN_SUCCESS);

}

More

Created Jul 30, 2020 // Last Updated Aug 9, 2020

If you could revise
the fundmental principles of
computer system design
to improve security...

... what would you change?