diff options
Diffstat (limited to 'tools/apriori')
-rw-r--r-- | tools/apriori/Android.mk | 54 | ||||
-rw-r--r-- | tools/apriori/apriori.c | 2601 | ||||
-rw-r--r-- | tools/apriori/apriori.h | 14 | ||||
-rw-r--r-- | tools/apriori/cmdline.c | 186 | ||||
-rw-r--r-- | tools/apriori/cmdline.h | 21 | ||||
-rw-r--r-- | tools/apriori/common.h | 28 | ||||
-rw-r--r-- | tools/apriori/debug.c | 38 | ||||
-rw-r--r-- | tools/apriori/debug.h | 88 | ||||
-rw-r--r-- | tools/apriori/hash.c | 27 | ||||
-rw-r--r-- | tools/apriori/hash.h | 14 | ||||
-rw-r--r-- | tools/apriori/main.c | 229 | ||||
-rw-r--r-- | tools/apriori/prelink_info.c | 106 | ||||
-rw-r--r-- | tools/apriori/prelink_info.h | 9 | ||||
-rw-r--r-- | tools/apriori/prelinkmap.c | 139 | ||||
-rw-r--r-- | tools/apriori/prelinkmap.h | 10 | ||||
-rw-r--r-- | tools/apriori/rangesort.c | 317 | ||||
-rw-r--r-- | tools/apriori/rangesort.h | 105 | ||||
-rw-r--r-- | tools/apriori/source.c | 18 | ||||
-rw-r--r-- | tools/apriori/source.h | 121 | ||||
-rwxr-xr-x | tools/apriori/tweak.h | 15 |
20 files changed, 4140 insertions, 0 deletions
diff --git a/tools/apriori/Android.mk b/tools/apriori/Android.mk new file mode 100644 index 0000000..71e4f4a --- /dev/null +++ b/tools/apriori/Android.mk @@ -0,0 +1,54 @@ +# Copyright 2005 The Android Open Source Project +# +# Android.mk for apriori +# + +LOCAL_PATH:= $(call my-dir) + +ifeq ($(TARGET_ARCH),arm) +include $(CLEAR_VARS) + +LOCAL_LDLIBS += -ldl +LOCAL_CFLAGS += -O2 -g +LOCAL_CFLAGS += -fno-function-sections -fno-data-sections -fno-inline +LOCAL_CFLAGS += -Wall -Wno-unused-function #-Werror +LOCAL_CFLAGS += -DBIG_ENDIAN=1 +LOCAL_CFLAGS += -DARM_SPECIFIC_HACKS +LOCAL_CFLAGS += -DSUPPORT_ANDROID_PRELINK_TAGS +LOCAL_CFLAGS += -DDEBUG +LOCAL_CFLAGS += -DADJUST_ELF=1 + +ifeq ($(HOST_OS),darwin) +LOCAL_CFLAGS += -DFSCANF_IS_BROKEN +endif +ifeq ($(HOST_OS),windows) +LOCAL_CFLAGS += -DFSCANF_IS_BROKEN +LOCAL_LDLIBS += -lintl +endif + + + +LOCAL_SRC_FILES := \ + apriori.c \ + cmdline.c \ + debug.c \ + hash.c \ + main.c \ + prelink_info.c \ + rangesort.c \ + source.c \ + prelinkmap.c + +LOCAL_C_INCLUDES:= \ + $(LOCAL_PATH)/ \ + external/elfutils/lib/ \ + external/elfutils/libelf/ \ + external/elfutils/libebl/ \ + external/elfcopy/ + +LOCAL_STATIC_LIBRARIES := libelfcopy libelf libebl libebl_arm #dl + +LOCAL_MODULE := apriori + +include $(BUILD_HOST_EXECUTABLE) +endif #TARGET_ARCH==arm diff --git a/tools/apriori/apriori.c b/tools/apriori/apriori.c new file mode 100644 index 0000000..d1807b3 --- /dev/null +++ b/tools/apriori/apriori.c @@ -0,0 +1,2601 @@ +#include <stdio.h> +#include <common.h> +#include <debug.h> +#include <libelf.h> +#include <libebl.h> +#ifdef ARM_SPECIFIC_HACKS + #include <libebl_arm.h> +#endif/*ARM_SPECIFIC_HACKS*/ +#include <elf.h> +#include <gelf.h> +#include <string.h> +#include <errno.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <hash.h> +#include <apriori.h> +#include <source.h> +#include <tweak.h> +#include <rangesort.h> +#include <prelink_info.h> +#include <prelinkmap.h> +#include <libgen.h> + +#ifndef ADJUST_ELF +#error "ADJUST_ELF must be defined!" +#endif + +/* When this macro is defined, apriori sets to ZERO those relocation values for + which it canot find the appropriate referent. +*/ +#define PERMISSIVE +#define COPY_SECTION_DATA_BUFFER (0) +/* When this macro is set to a nonzero value, we replace calls to elf_strptr() + on the target ELF handle with code that extracts the strings directly from + the data buffers of that ELF handle. In this case, elf_strptr() does not + work as expected, as it tries to read the data buffer of the associated + string section directly from the file, and that buffer does not exist yet + in the file, since we haven't committed our changes yet. +*/ +#define ELF_STRPTR_IS_BROKEN (1) + +/* When the macro below is defined, apriori does not mark for removal those + relocation sections that it fully handles. Instead, apriori just sets their + sizes to zero. This is more for debugging than of any actual use. + + This macro is meaningful only when ADJUST_ELF!=0 +*/ +#define REMOVE_HANDLED_SECTIONS + +extern int verbose_flag; + +static source_t *sources = NULL; + +#if defined(DEBUG) && 0 + +static void print_shdr(source_t *source, Elf_Scn *scn) +{ + GElf_Shdr shdr_mem, *shdr; + shdr = gelf_getshdr(scn, &shdr_mem); + Elf_Data *data = elf_getdata(scn, NULL); + INFO("\t%02d: data = %p, hdr = { offset = %8lld, size = %lld }, " + "data->d_buf = %p data->d_off = %lld, data->d_size = %d\n", + elf_ndxscn(scn), + data, + shdr->sh_offset, shdr->sh_size, + data->d_buf, data->d_off, data->d_size); +} + +static void print_shdr_idx(source_t *source, Elf *elf, int idx) +{ + print_shdr(source, elf_getscn(elf, idx)); +} + +static void print_shdrs(source_t *source) { + Elf_Scn *scn = NULL; + INFO("section offset dump for new ELF\n"); + while ((scn = elf_nextscn (source->elf, scn)) != NULL) + print_shdr(source, scn); + + INFO("\nsection offset dump for original ELF\n"); + while ((scn = elf_nextscn (source->oldelf, scn)) != NULL) + print_shdr(source, scn); + +#if 0 + { + INFO("section offset dump for new ELF\n"); + int i = 0; + for (i = 0; i < source->shnum; i++) { + scn = elf_getscn(source->elf, i); + print_shdr(source, scn); + } + } +#endif +} + +#endif /* DEBUG */ + +static char * find_file(const char *libname, + char **lib_lookup_dirs, + int num_lib_lookup_dirs); + +static inline source_t* find_source(const char *name, + char **lib_lookup_dirs, + int num_lib_lookup_dirs) { + char *full = find_file(name, lib_lookup_dirs, num_lib_lookup_dirs); + if (full) { + source_t *trav = sources; + while (trav) { + if (!strcmp(trav->name, full)) + break; + trav = trav->next; + } + free(full); + return trav; + } + return NULL; +} + +static inline void add_to_sources(source_t *src) { + src->next = sources; + sources = src; +} + +static void handle_range_error(range_error_t err, + range_t *left, range_t *right) { + switch (err) { + case ERROR_CONTAINS: + ERROR("ERROR: section (%lld, %lld bytes) contains " + "section (%lld, %lld bytes)\n", + left->start, left->length, + right->start, right->length); + break; + case ERROR_OVERLAPS: + ERROR("ERROR: Section (%lld, %lld bytes) intersects " + "section (%lld, %lld bytes)\n", + left->start, left->length, + right->start, right->length); + break; + default: + ASSERT(!"Unknown range error code!"); + } + + FAILIF(1, "Range error.\n"); +} + +static void create_elf_sections(source_t *source, Elf *elf) +{ + INFO("Creating new ELF sections.\n"); + ASSERT(elf == NULL || source->elf == NULL || source->elf == elf); + if (elf == NULL) { + ASSERT(source->elf != NULL); + elf = source->elf; + } + + int cnt = 1; + Elf_Scn *oldscn = NULL, *scn; + while ((oldscn = elf_nextscn (source->oldelf, oldscn)) != NULL) { + GElf_Shdr *oldshdr, oldshdr_mem; + + scn = elf_newscn(elf); + FAILIF_LIBELF(NULL == scn, elf_newscn); + + oldshdr = gelf_getshdr(oldscn, &oldshdr_mem); + FAILIF_LIBELF(NULL == oldshdr, gelf_getshdr); + /* Set the section header of the new section to be the same as the + headset of the old section by default. */ + gelf_update_shdr(scn, oldshdr); + + /* Copy the section data */ + Elf_Data *olddata = elf_getdata(oldscn, NULL); + FAILIF_LIBELF(NULL == olddata, elf_getdata); + + Elf_Data *data = elf_newdata(scn); + FAILIF_LIBELF(NULL == data, elf_newdata); + *data = *olddata; +#if COPY_SECTION_DATA_BUFFER + if (olddata->d_buf != NULL) { + data->d_buf = MALLOC(data->d_size); + memcpy(data->d_buf, olddata->d_buf, olddata->d_size); + } +#endif + + INFO("\tsection %02d: [%-30s] created\n", + cnt, + elf_strptr(source->oldelf, + source->shstrndx, + oldshdr->sh_name)); + + if (ADJUST_ELF) { + ASSERT(source->shdr_info != NULL); + /* Create a new section. */ + source->shdr_info[cnt].idx = cnt; + source->shdr_info[cnt].newscn = scn; + source->shdr_info[cnt].data = data; + source->shdr_info[cnt]. + use_old_shdr_for_relocation_calculations = 1; + INFO("\tsection [%s] (old offset %lld, old size %lld) " + "will have index %d (was %d).\n", + source->shdr_info[cnt].name, + source->shdr_info[cnt].old_shdr.sh_offset, + source->shdr_info[cnt].old_shdr.sh_size, + source->shdr_info[cnt].idx, + elf_ndxscn(source->shdr_info[cnt].scn)); + /* Same as the next assert */ + ASSERT(elf_ndxscn (source->shdr_info[cnt].newscn) == + source->shdr_info[cnt].idx); + } + + ASSERT(elf_ndxscn(scn) == (size_t)cnt); + cnt++; + } +} + +/* This function sets up the shdr_info[] array of a source_t. We call it only + when ADJUST_ELF is non-zero (i.e., support for adjusting an ELF file for + changes in sizes and numbers of relocation sections is compiled in. Note + that setup_shdr_info() depends only on the information in source->oldelf, + not on source->elf. +*/ + +static void setup_shdr_info(source_t *source) +{ + if (ADJUST_ELF) + { + /* Allocate the section-header-info buffer. */ + INFO("Allocating section-header info structure (%d) bytes...\n", + source->shnum * sizeof (shdr_info_t)); + + source->shdr_info = (shdr_info_t *)CALLOC(source->shnum, + sizeof (shdr_info_t)); + + /* Mark the SHT_NULL section as handled. */ + source->shdr_info[0].idx = 2; + + int cnt = 1; + Elf_Scn *oldscn = NULL; + while ((oldscn = elf_nextscn (source->oldelf, oldscn)) != NULL) { + /* Copy the section header */ + ASSERT(elf_ndxscn(oldscn) == (size_t)cnt); + + /* Initialized the corresponding shdr_info entry */ + { + /* Mark the section with a non-zero index. Later, when we + decide to drop a section, we will set its idx to zero, and + assign section numbers to the remaining sections. + */ + source->shdr_info[cnt].idx = 1; + + source->shdr_info[cnt].scn = oldscn; + + /* NOTE: Here we pupulate the section-headset struct with the + same values as the original section's. After the + first run of prelink(), we will update the sh_size + fields of those sections that need resizing. + */ + FAILIF_LIBELF(NULL == + gelf_getshdr(oldscn, + &source->shdr_info[cnt].shdr), + gelf_getshdr); + + /* Get the name of the section. */ + source->shdr_info[cnt].name = + elf_strptr (source->oldelf, source->shstrndx, + source->shdr_info[cnt].shdr.sh_name); + + INFO("\tname: %s\n", source->shdr_info[cnt].name); + FAILIF(source->shdr_info[cnt].name == NULL, + "Malformed file: section %d name is null\n", + cnt); + + /* Remember the shdr.sh_link value. We need to remember this + value for those sections that refer to other sections. For + example, we need to remember it for relocation-entry + sections, because if we modify the symbol table that a + relocation-entry section is relative to, then we need to + patch the relocation section. By the time we get to + deciding whether we need to patch the relocation section, we + will have overwritten its header's sh_link field with a new + value. + */ + source->shdr_info[cnt].old_shdr = source->shdr_info[cnt].shdr; + INFO("\t\toriginal sh_link: %08d\n", + source->shdr_info[cnt].old_shdr.sh_link); + INFO("\t\toriginal sh_addr: %lld\n", + source->shdr_info[cnt].old_shdr.sh_addr); + INFO("\t\toriginal sh_offset: %lld\n", + source->shdr_info[cnt].old_shdr.sh_offset); + INFO("\t\toriginal sh_size: %lld\n", + source->shdr_info[cnt].old_shdr.sh_size); + + FAILIF(source->shdr_info[cnt].shdr.sh_type == SHT_SYMTAB_SHNDX, + "Cannot handle sh_type SHT_SYMTAB_SHNDX!\n"); + FAILIF(source->shdr_info[cnt].shdr.sh_type == SHT_GROUP, + "Cannot handle sh_type SHT_GROUP!\n"); + FAILIF(source->shdr_info[cnt].shdr.sh_type == SHT_GNU_versym, + "Cannot handle sh_type SHT_GNU_versym!\n"); + } + + cnt++; + } /* for each section */ + } /* if (ADJUST_ELF) */ +} + +static Elf * init_elf(source_t *source, bool create_new_sections) +{ + Elf *elf; + if (source->output != NULL) { + if (source->output_is_dir) { + source->output_is_dir++; + char *dir = source->output; + int dirlen = strlen(dir); + /* The main() function maintains a pointer to source->output; it + frees the buffer after apriori() returns. + */ + source->output = MALLOC(dirlen + + 1 + /* slash */ + strlen(source->name) + + 1); /* null terminator */ + strcpy(source->output, dir); + source->output[dirlen] = '/'; + strcpy(source->output + dirlen + 1, + basename(source->name)); + } + + source->newelf_fd = open(source->output, + O_RDWR | O_CREAT, + 0666); + FAILIF(source->newelf_fd < 0, "open(%s): %s (%d)\n", + source->output, + strerror(errno), + errno); + elf = elf_begin(source->newelf_fd, ELF_C_WRITE, NULL); + FAILIF_LIBELF(elf == NULL, elf_begin); + } else { + elf = elf_clone(source->oldelf, ELF_C_EMPTY); + FAILIF_LIBELF(elf == NULL, elf_clone); + } + + GElf_Ehdr *oldehdr = gelf_getehdr(source->oldelf, &source->old_ehdr_mem); + FAILIF_LIBELF(NULL == oldehdr, gelf_getehdr); + + /* Create new ELF and program headers for the elf file */ + INFO("Creating empty ELF and program headers...\n"); + FAILIF_LIBELF(gelf_newehdr (elf, gelf_getclass (source->oldelf)) == 0, + gelf_newehdr); + FAILIF_LIBELF(oldehdr->e_type != ET_REL + && gelf_newphdr (elf, + oldehdr->e_phnum) == 0, + gelf_newphdr); + + /* Copy the elf header */ + INFO("Copying ELF header...\n"); + GElf_Ehdr *ehdr = gelf_getehdr(elf, &source->ehdr_mem); + FAILIF_LIBELF(NULL == ehdr, gelf_getehdr); + memcpy(ehdr, oldehdr, sizeof(GElf_Ehdr)); + FAILIF_LIBELF(!gelf_update_ehdr(elf, ehdr), gelf_update_ehdr); + + /* Copy out the old program header: notice that if the ELF file does not + have a program header, this loop won't execute. + */ + INFO("Copying ELF program header...\n"); + { + int cnt; + source->phdr_info = (GElf_Phdr *)CALLOC(ehdr->e_phnum, + sizeof(GElf_Phdr)); + for (cnt = 0; cnt < ehdr->e_phnum; ++cnt) { + INFO("\tRetrieving entry %d\n", cnt); + FAILIF_LIBELF(NULL == + gelf_getphdr(source->oldelf, cnt, + source->phdr_info + cnt), + gelf_getphdr); + FAILIF_LIBELF(gelf_update_phdr (elf, cnt, + source->phdr_info + cnt) == 0, + gelf_update_phdr); + } + } + + /* Copy the sections and the section headers. */ + if (create_new_sections) + { + create_elf_sections(source, elf); + } + + /* The ELF library better follows our layout when this is not a + relocatable object file. */ + elf_flagelf (elf, ELF_C_SET, (ehdr->e_type != ET_REL ? ELF_F_LAYOUT : 0)); + + return elf; +} + +static shdr_info_t *lookup_shdr_info_by_new_section( + source_t *source, + const char *sname, + Elf_Scn *newscn) +{ + if (source->shdr_info == NULL) return NULL; + int cnt; + for (cnt = 0; cnt < source->shnum; cnt++) { + if (source->shdr_info[cnt].newscn == newscn) { + INFO("\t\tnew section at %p matches shdr_info[%d], " + "section [%s]!\n", + newscn, + cnt, + source->shdr_info[cnt].name); + FAILIF(strcmp(sname, source->shdr_info[cnt].name), + "Matched section's name [%s] does not match " + "looked-up section's name [%s]!\n", + source->shdr_info[cnt].name, + sname); + return source->shdr_info + cnt; + } + } + return NULL; +} + +static bool do_init_source(source_t *source, unsigned base) +{ + /* Find various sections. */ + size_t scnidx; + Elf_Scn *scn; + GElf_Shdr *shdr, shdr_mem; + source->sorted_sections = init_range_list(); + INFO("Processing [%s]'s sections...\n", source->name); + for (scnidx = 1; scnidx < (size_t)source->shnum; scnidx++) { + INFO("\tGetting section index %d...\n", scnidx); + scn = elf_getscn(source->elf, scnidx); + if (NULL == scn) { + /* If we get an error from elf_getscn(), it means that a section + at the requested index does not exist. This may happen when + we remove sections. Since we do not update source->shnum + (we can't, since we need to know the original number of sections + to know source->shdr_info[]'s length), we will attempt to + retrieve a section for an index that no longer exists in the + new ELF file. */ + INFO("\tThere is no section at index %d anymore, continuing.\n", + scnidx); + continue; + } + shdr = gelf_getshdr(scn, &shdr_mem); + FAILIF_LIBELF(NULL == shdr, gelf_getshdr); + + /* We haven't modified the shstrtab section, and so shdr->sh_name + has the same value as before. Thus we look up the name based + on the old ELF handle. We cannot use shstrndx on the new ELF + handle because the index of the shstrtab section may have + changed (and calling elf_getshstrndx() returns the same section + index, so libelf can't handle thise ither). + */ + const char *sname = + elf_strptr(source->oldelf, source->shstrndx, shdr->sh_name); + ASSERT(sname); + + INFO("\tAdding [%s] (%lld, %lld)...\n", + sname, + shdr->sh_addr, + shdr->sh_addr + shdr->sh_size); + if ((shdr->sh_flags & SHF_ALLOC) == SHF_ALLOC) { + add_unique_range_nosort(source->sorted_sections, + shdr->sh_addr, + shdr->sh_size, + scn, + handle_range_error, + NULL); /* no user-data destructor */ + } + + if (shdr->sh_type == SHT_DYNSYM) { + source->symtab.scn = scn; + source->symtab.data = elf_getdata(scn, NULL); + FAILIF_LIBELF(NULL == source->symtab.data, elf_getdata); + memcpy(&source->symtab.shdr, shdr, sizeof(GElf_Shdr)); + source->symtab.info = lookup_shdr_info_by_new_section( + source, sname, scn); + ASSERT(source->shdr_info == NULL || source->symtab.info != NULL); + + /* The sh_link field of the section header of the symbol table + contains the index of the associated strings table. */ + source->strtab.scn = elf_getscn(source->elf, + source->symtab.shdr.sh_link); + FAILIF_LIBELF(NULL == source->strtab.scn, elf_getscn); + FAILIF_LIBELF(NULL == gelf_getshdr(source->strtab.scn, + &source->strtab.shdr), + gelf_getshdr); + source->strtab.data = elf_getdata(source->strtab.scn, NULL); + FAILIF_LIBELF(NULL == source->strtab.data, elf_getdata); + source->strtab.info = lookup_shdr_info_by_new_section( + source, + elf_strptr(source->oldelf, source->shstrndx, + source->strtab.shdr.sh_name), + source->strtab.scn); + ASSERT(source->shdr_info == NULL || source->strtab.info != NULL); + } else if (shdr->sh_type == SHT_DYNAMIC) { + source->dynamic.scn = scn; + source->dynamic.data = elf_getdata(scn, NULL); + FAILIF_LIBELF(NULL == source->dynamic.data, elf_getdata); + memcpy(&source->dynamic.shdr, shdr, sizeof(GElf_Shdr)); + source->dynamic.info = lookup_shdr_info_by_new_section( + source, sname, scn); + ASSERT(source->shdr_info == NULL || source->dynamic.info != NULL); + } else if (shdr->sh_type == SHT_HASH) { + source->hash.scn = scn; + source->hash.data = elf_getdata(scn, NULL); + FAILIF_LIBELF(NULL == source->hash.data, elf_getdata); + memcpy(&source->hash.shdr, shdr, sizeof(GElf_Shdr)); + source->hash.info = lookup_shdr_info_by_new_section( + source, sname, scn); + ASSERT(source->shdr_info == NULL || source->hash.info != NULL); + } else if (shdr->sh_type == SHT_REL || shdr->sh_type == SHT_RELA) { + if (source->num_relocation_sections == + source->relocation_sections_size) { + source->relocation_sections_size += 5; + source->relocation_sections = + (section_info_t *)REALLOC(source->relocation_sections, + source->relocation_sections_size * + sizeof(section_info_t)); + } + section_info_t *reloc = + source->relocation_sections + source->num_relocation_sections; + reloc->scn = scn; + reloc->info = lookup_shdr_info_by_new_section(source, sname, scn); + ASSERT(source->shdr_info == NULL || reloc->info != NULL); + reloc->data = elf_getdata(scn, NULL); + FAILIF_LIBELF(NULL == reloc->data, elf_getdata); + memcpy(&reloc->shdr, shdr, sizeof(GElf_Shdr)); + source->num_relocation_sections++; + } else if (!strcmp(sname, ".bss")) { + source->bss.scn = scn; + source->bss.data = elf_getdata(scn, NULL); + source->bss.info = lookup_shdr_info_by_new_section( + source, sname, scn); + ASSERT(source->shdr_info == NULL || source->bss.info != NULL); + /* The BSS section occupies no space in the ELF file. */ + FAILIF_LIBELF(NULL == source->bss.data, elf_getdata) + FAILIF(NULL != source->bss.data->d_buf, + "Enexpected: section [%s] has data!", + sname); + memcpy(&source->bss.shdr, shdr, sizeof(GElf_Shdr)); + } + } + sort_ranges(source->sorted_sections); + + source->unfinished = + (unfinished_relocation_t *)CALLOC(source->num_relocation_sections, + sizeof(unfinished_relocation_t)); + + if (source->dynamic.scn == NULL) { + INFO("File [%s] does not have a dynamic section!\n", source->name); + /* If this is a static executable, we won't update anything. */ + source->dry_run = 1; + return false; + } + + FAILIF(source->symtab.scn == NULL, + "File [%s] does not have a dynamic symbol table!\n", + source->name); + FAILIF(source->hash.scn == NULL, + "File [%s] does not have a hash table!\n", + source->name); + FAILIF(source->hash.shdr.sh_link != elf_ndxscn(source->symtab.scn), + "Hash points to section %d, not to %d as expected!\n", + source->hash.shdr.sh_link, + elf_ndxscn(source->symtab.scn)); + + /* Now, find out how many symbols we have and allocate the array of + satisfied symbols. + + NOTE: We don't count the number of undefined symbols here; we will + iterate over the symbol table later, and count them then, when it is + more convenient. + */ + size_t symsize = gelf_fsize (source->elf, + ELF_T_SYM, + 1, source->elf_hdr.e_version); + ASSERT(symsize); + + source->num_syms = source->symtab.data->d_size / symsize; + source->base = (source->oldelf_hdr.e_type == ET_DYN) ? base : 0; + INFO("Relink base for [%s]: 0x%lx\n", source->name, source->base); + FAILIF(source->base == -1, + "Can't prelink [%s]: it's a shared library and you did not " + "provide a prelink address!\n", + source->name); +#ifdef SUPPORT_ANDROID_PRELINK_TAGS + FAILIF(source->prelinked && source->base != source->prelink_base, + "ERROR: file [%s] has already been prelinked for 0x%08lx. " + "Cannot change to 0x%08lx!\n", + source->name, + source->prelink_base, + source->base); +#endif/*SUPPORT_ANDROID_PRELINK_TAGS*/ + + return true; +} + +static source_t* init_source(const char *full_path, + const char *output, int is_file, + int base, int dry_run) +{ + source_t *source = (source_t *)CALLOC(1, sizeof(source_t)); + + ASSERT(full_path); + source->name = full_path; + source->output = output; + source->output_is_dir = !is_file; + + source->newelf_fd = -1; + source->elf_fd = -1; + INFO("Opening %s...\n", full_path); + source->elf_fd = + open(full_path, ((dry_run || output != NULL) ? O_RDONLY : O_RDWR)); + FAILIF(source->elf_fd < 0, "open(%s): %s (%d)\n", + full_path, + strerror(errno), + errno); + + FAILIF(fstat(source->elf_fd, &source->elf_file_info) < 0, + "fstat(%s(fd %d)): %s (%d)\n", + source->name, + source->elf_fd, + strerror(errno), + errno); + INFO("File [%s]'s size is %lld bytes!\n", + source->name, + source->elf_file_info.st_size); + + INFO("Calling elf_begin(%s)...\n", full_path); + + source->oldelf = + elf_begin(source->elf_fd, + (dry_run || output != NULL) ? ELF_C_READ : ELF_C_RDWR, + NULL); + FAILIF_LIBELF(source->oldelf == NULL, elf_begin); + + /* libelf can recognize COFF and A.OUT formats, but we handle only ELF. */ + if(elf_kind(source->oldelf) != ELF_K_ELF) { + ERROR("Input file %s is not in ELF format!\n", full_path); + return NULL; + } + + /* Make sure this is a shared library or an executable. */ + { + INFO("Making sure %s is a shared library or an executable...\n", + full_path); + FAILIF_LIBELF(0 == gelf_getehdr(source->oldelf, &source->oldelf_hdr), + gelf_getehdr); + FAILIF(source->oldelf_hdr.e_type != ET_DYN && + source->oldelf_hdr.e_type != ET_EXEC, + "%s must be a shared library (elf type is %d, expecting %d).\n", + full_path, + source->oldelf_hdr.e_type, + ET_DYN); + } + +#ifdef SUPPORT_ANDROID_PRELINK_TAGS + /* First, check to see if the file has been prelinked. */ + source->prelinked = + check_prelinked(source->name, + source->oldelf_hdr.e_ident[EI_DATA] == ELFDATA2LSB, + &source->prelink_base); + /* Note that in the INFO() below we need to use oldelf_hdr because we + haven't cloned the ELF file yet, and source->elf_hdr is not defined. */ + if (source->prelinked) { + PRINT("%s [%s] is already prelinked at 0x%08lx!\n", + (source->oldelf_hdr.e_type == ET_EXEC ? + "Executable" : "Shared library"), + source->name, + source->prelink_base); + /* Force a dry run when the file has already been prelinked */ + source->dry_run = dry_run = 1; + } + else { + INFO("%s [%s] is not prelinked!\n", + (source->oldelf_hdr.e_type == ET_EXEC ? + "Executable" : "Shared library"), + source->name); + source->dry_run = dry_run; + } +#endif/*SUPPORT_ANDROID_PRELINK_TAGS*/ + + /* Get the index of the section-header-strings-table section. */ + FAILIF_LIBELF(elf_getshstrndx (source->oldelf, &source->shstrndx) < 0, + elf_getshstrndx); + + FAILIF_LIBELF(elf_getshnum (source->oldelf, (size_t *)&source->shnum) < 0, + elf_getshnum); + + /* When we have a dry run, or when ADJUST_ELF is enabled, we use + source->oldelf for source->elf, because the former is mmapped privately, + so changes to it have no effect. With ADJUST_ELF, the first run of + prelink() is a dry run. We will reopen the elf file for write access + after that dry run, before we call adjust_elf. */ + + source->elf = (ADJUST_ELF || source->dry_run) ? + source->oldelf : init_elf(source, ADJUST_ELF == 0); + + FAILIF_LIBELF(0 == gelf_getehdr(source->elf, &source->elf_hdr), + gelf_getehdr); +#ifdef DEBUG + ASSERT(!memcmp(&source->oldelf_hdr, + &source->elf_hdr, + sizeof(source->elf_hdr))); +#endif + + /* Get the EBL handling. The -g option is currently the only reason + we need EBL so dont open the backend unless necessary. */ + source->ebl = ebl_openbackend (source->elf); + FAILIF_LIBELF(NULL == source->ebl, ebl_openbackend); +#ifdef ARM_SPECIFIC_HACKS + FAILIF_LIBELF(0 != arm_init(source->elf, source->elf_hdr.e_machine, + source->ebl, sizeof(Ebl)), + arm_init); +#endif/*ARM_SPECIFIC_HACKS*/ + + add_to_sources(source); + if (do_init_source(source, base) == false) return NULL; + return source; +} + +/* complements do_init_source() */ +static void do_destroy_source(source_t *source) +{ + int cnt; + destroy_range_list(source->sorted_sections); + source->sorted_sections = NULL; + for (cnt = 0; cnt < source->num_relocation_sections; cnt++) { + FREEIF(source->unfinished[cnt].rels); + source->unfinished[cnt].rels = NULL; + source->unfinished[cnt].num_rels = 0; + source->unfinished[cnt].rels_size = 0; + } + if (source->jmprel.sections != NULL) { + destroy_range_list(source->jmprel.sections); + source->jmprel.sections = NULL; + } + if (source->rel.sections != NULL) { + destroy_range_list(source->rel.sections); + source->rel.sections = NULL; + } + FREE(source->unfinished); /* do_init_source() */ + source->unfinished = NULL; + FREE(source->relocation_sections); /* do_init_source() */ + source->relocation_sections = NULL; + source->num_relocation_sections = source->relocation_sections_size = 0; +} + +static void destroy_source(source_t *source) +{ + /* Is this a little-endian ELF file? */ + if (source->oldelf != source->elf) { + /* If it's a dynamic executable, this must not be a dry run. */ + if (!source->dry_run && source->dynamic.scn != NULL) + { + FAILIF_LIBELF(elf_update(source->elf, ELF_C_WRITE) == -1, + elf_update); + } + FAILIF_LIBELF(elf_end(source->oldelf), elf_end); + } + ebl_closebackend(source->ebl); + FAILIF_LIBELF(elf_end(source->elf), elf_end); + FAILIF(close(source->elf_fd) < 0, "Could not close file %s: %s (%d)!\n", + source->name, strerror(errno), errno); + FAILIF((source->newelf_fd >= 0) && (close(source->newelf_fd) < 0), + "Could not close output file: %s (%d)!\n", strerror(errno), errno); + +#ifdef SUPPORT_ANDROID_PRELINK_TAGS + if (!source->dry_run) { + if (source->dynamic.scn != NULL && + source->elf_hdr.e_type != ET_EXEC) + { + /* For some reason, trying to write directly to source->elf_fd + causes a "bad file descriptor" error because of something libelf + does. We just close the file descriptor and open a new one in + function setup_prelink_info() below. */ + INFO("%s: setting up prelink tag at end of file.\n", + source->output ? source->output : source->name); + setup_prelink_info(source->output ? source->output : source->name, + source->elf_hdr.e_ident[EI_DATA] == ELFDATA2LSB, + source->base); + } + else INFO("%s: executable, NOT setting up prelink tag.\n", + source->name); + } +#endif/*SUPPORT_ANDROID_PRELINK_TAGS*/ + + do_destroy_source(source); + + if (source->shstrtab_data != NULL) + FREEIF(source->shstrtab_data->d_buf); /* adjust_elf */ + + FREE(source->lib_deps); /* list of library dependencies (process_file()) */ + FREEIF(source->shdr_info); /* setup_shdr_info() */ + FREEIF(source->phdr_info); /* init_elf() */ + FREE(source->name); /* assigned to by init_source() */ + /* If the output is a directory, in init_elf() we allocate a buffer where + we copy the directory, a slash, and the file name. Here we free that + buffer. + */ + if (source->output_is_dir > 1) { + FREE(source->output); + } + FREE(source); /* init_source() */ +} + +static void reinit_source(source_t *source) +{ + do_destroy_source(source); + do_init_source(source, source->base); + + { + /* We've gathered all the DT_DYNAMIC entries; now we need to figure + out which relocation sections fit in which range as described by + the entries. Before we do so, however, we will populate the + jmprel and rel members of source, as well as their sizes. + */ + + size_t dynidx, numdyn; + GElf_Dyn *dyn, dyn_mem; + + numdyn = source->dynamic.shdr.sh_size / + source->dynamic.shdr.sh_entsize; + + source->rel.idx = source->rel.sz_idx = -1; + source->jmprel.idx = source->jmprel.sz_idx = -1; + for (dynidx = 0; dynidx < numdyn; dynidx++) { + dyn = gelf_getdyn (source->dynamic.data, + dynidx, + &dyn_mem); + FAILIF_LIBELF(NULL == dyn, gelf_getdyn); + switch (dyn->d_tag) + { + case DT_NEEDED: + break; + case DT_JMPREL: + INFO("reinit_source: DT_JMPREL is at index %d, 0x%08llx.\n", + dynidx, dyn->d_un.d_ptr); + source->jmprel.idx = dynidx; + source->jmprel.addr = dyn->d_un.d_ptr; + break; + case DT_PLTRELSZ: + INFO("reinit_source: DT_PLTRELSZ is at index %d, 0x%08llx.\n", + dynidx, dyn->d_un.d_val); + source->jmprel.sz_idx = dynidx; + source->jmprel.size = dyn->d_un.d_val; + break; + case DT_REL: + INFO("reinit_source: DT_REL is at index %d, 0x%08llx.\n", + dynidx, dyn->d_un.d_ptr); + source->rel.idx = dynidx; + source->rel.addr = dyn->d_un.d_ptr; + break; + case DT_RELSZ: + INFO("reinit_source: DT_RELSZ is at index %d, 0x%08llx.\n", + dynidx, dyn->d_un.d_val); + source->rel.sz_idx = dynidx; + source->rel.size = dyn->d_un.d_val; + break; + case DT_RELA: + case DT_RELASZ: + FAILIF(1, "Can't handle DT_RELA and DT_RELASZ entries!\n"); + break; + } /* switch */ + } /* for each dynamic entry... */ + } +} + +static GElf_Sym *hash_lookup_global_or_weak_symbol(source_t *lib, + const char *symname, + GElf_Sym *lib_sym_mem) +{ + int lib_symidx = hash_lookup(lib->elf, + lib->hash.data, + lib->symtab.data, + lib->strtab.data, + symname); + + GElf_Sym sym_mem; + if (SHN_UNDEF != lib_symidx) { + /* We found the symbol--now check to see if it is global + or weak. If this is the case, then the symbol satisfies + the dependency. */ + GElf_Sym *lib_sym = gelf_getsymshndx(lib->symtab.data, + NULL, + lib_symidx, + &sym_mem, + NULL); + FAILIF_LIBELF(NULL == lib_sym, gelf_getsymshndx); +#if ELF_STRPTR_IS_BROKEN + ASSERT(!strcmp( + symname, + ((char *)elf_getdata(elf_getscn(lib->elf, + lib->symtab.shdr.sh_link), + NULL)->d_buf) + + lib_sym->st_name)); +#else + ASSERT(!strcmp( + symname, + elf_strptr(lib->elf, lib->symtab.shdr.sh_link, + lib_sym->st_name))); +#endif + if (lib_sym->st_shndx != SHN_UNDEF && + (GELF_ST_BIND(lib_sym->st_info) == STB_GLOBAL || + GELF_ST_BIND(lib_sym->st_info) == STB_WEAK)) { + memcpy(lib_sym_mem, &sym_mem, sizeof(GElf_Sym)); + return lib_sym; + } + } + + return NULL; +} + +static source_t *lookup_symbol_in_dependencies(source_t *source, + const char *symname, + GElf_Sym *found_sym) +{ + source_t *sym_source = NULL; /* return value */ + + /* This is an undefined symbol. Go over the list of libraries + and look it up. */ + size_t libidx; + int found = 0; + source_t *last_found = NULL; + for (libidx = 0; libidx < (size_t)source->num_lib_deps; libidx++) { + source_t *lib = source->lib_deps[libidx]; + if (hash_lookup_global_or_weak_symbol(lib, symname, found_sym) != NULL) + { + sym_source = lib; + if (found) { + if (found == 1) { + found++; + ERROR("ERROR: multiple definitions found for [%s:%s]!\n", + source->name, symname); + ERROR("\tthis definition [%s]\n", lib->name); + } + ERROR("\tprevious definition [%s]\n", last_found->name); + } + last_found = lib; + if (!found) found = 1; + } + } + +#if ELF_STRPTR_IS_BROKEN + ASSERT(!sym_source || + !strcmp(symname, + (char *)(elf_getdata(elf_getscn( + sym_source->elf, + sym_source->symtab.shdr.sh_link), + NULL)->d_buf) + + found_sym->st_name)); +#else + ASSERT(!sym_source || + !strcmp(symname, + elf_strptr(sym_source->elf, + sym_source->symtab.shdr.sh_link, + found_sym->st_name))); +#endif + + return sym_source; +} + +static int do_prelink(source_t *source, + Elf_Data *reloc_scn_data, + int reloc_scn_entry_size, + unfinished_relocation_t *unfinished, + int locals_only, + bool dry_run, + char **lib_lookup_dirs, int num_lib_lookup_dirs, + char **default_libs, int num_default_libs, + int *num_unfinished_relocs) +{ + int num_relocations = 0; + + size_t num_rels; + num_rels = reloc_scn_data->d_size / reloc_scn_entry_size; + + INFO("\tThere are %d relocations.\n", num_rels); + + int rel_idx; + for (rel_idx = 0; rel_idx < (size_t)num_rels; rel_idx++) { + GElf_Rel *rel, rel_mem; + + //INFO("\tHandling relocation %d/%d\n", rel_idx, num_rels); + + rel = gelf_getrel(reloc_scn_data, rel_idx, &rel_mem); + FAILIF_LIBELF(rel == NULL, gelf_getrel); + GElf_Sym *sym = NULL, sym_mem; + unsigned sym_idx = GELF_R_SYM(rel->r_info); + source_t *sym_source = NULL; + /* found_sym points to found_sym_mem, when sym_source != NULL, and + to sym, when the sybmol is locally defined. If the symbol is + not locally defined and sym_source == NULL, then sym is not + defined either. */ + GElf_Sym *found_sym = NULL, found_sym_mem; + const char *symname = NULL; + int sym_is_local = 1; + if (sym_idx) { + sym = gelf_getsymshndx(source->symtab.data, + NULL, + sym_idx, + &sym_mem, + NULL); + FAILIF_LIBELF(NULL == sym, gelf_getsymshndx); +#if ELF_STRPTR_IS_BROKEN + symname = + ((char *)source->strtab.data->d_buf) + + sym->st_name; +#else + symname = elf_strptr(source->elf, + elf_ndxscn(source->strtab.scn), + sym->st_name); +#endif + + /* If the symbol is defined and is either not in the BSS + section, or if it is in the BSS then the relocation is + not a copy relocation, then the symbol's source is this + library (i.e., it is locally-defined). Otherwise, the + symbol is imported. + */ + + sym_is_local = 0; + if (sym->st_shndx != SHN_UNDEF && + (source->bss.scn == NULL || + sym->st_shndx != elf_ndxscn(source->bss.scn) || +#ifdef ARM_SPECIFIC_HACKS + GELF_R_TYPE(rel->r_info) != R_ARM_COPY +#else + 1 +#endif + )) + { + sym_is_local = 1; + } + + if (sym_is_local) { + INFO("\t\tSymbol [%s:%s] is defined locally.\n", + source->name, + symname); + sym_source = source; + found_sym = sym; + } + else if (!locals_only) { + sym_source = lookup_symbol_in_dependencies(source, + symname, + &found_sym_mem); + + /* The symbol was not in the list of dependencies, which by + itself is an error: it means either that the symbol does + not exist anywhere, or that the library which has the symbol + has not been listed as a dependency in this library or + executable. It could also mean (for a library) that the + symbol is defined in the executable that links agsinst it, + which is obviously not a good thing. These are bad things, + but they do happen, which is why we have the ability to + provide a list of default dependencies, including + executables. Here we check to see if the symbol has been + defined in any of them. + */ + if (NULL == sym_source) { + INFO("\t\tChecking default dependencies...\n"); + int i; + source_t *lib, *old_sym_source = NULL; + int printed_initial_error = 0; + for (i = 0; i < num_default_libs; i++) { + INFO("\tChecking in [%s].\n", default_libs[i]); + lib = find_source(default_libs[i], + lib_lookup_dirs, + num_lib_lookup_dirs); + FAILIF(NULL == lib, + "Can't find default library [%s]!\n", + default_libs[i]); + if (hash_lookup_global_or_weak_symbol(lib, + symname, + &found_sym_mem)) { + found_sym = &found_sym_mem; + sym_source = lib; +#if ELF_STRPTR_IS_BROKEN + ASSERT(!strcmp(symname, + (char *)(elf_getdata( + elf_getscn( + sym_source->elf, + sym_source->symtab. + shdr.sh_link), + NULL)->d_buf) + + found_sym->st_name)); +#else + ASSERT(!strcmp(symname, + elf_strptr(sym_source->elf, + sym_source->symtab.shdr.sh_link, + found_sym->st_name))); + +#endif + INFO("\tFound symbol [%s] in [%s]!\n", + symname, lib->name); + if (old_sym_source) { + if (printed_initial_error == 0) { + printed_initial_error = 1; + ERROR("Multiple definition of [%s]:\n" + "\t[%s]\n", + symname, + old_sym_source->name); + } + ERROR("\t[%s]\n", sym_source->name); + } + old_sym_source = sym_source; + } else { + INFO("\tCould not find symbol [%s] in default " + "lib [%s]!\n", symname, lib->name); + } + } + if (sym_source) { + ERROR("ERROR: Could not find [%s:%s] in dependent " + "libraries (but found in default [%s])!\n", + source->name, + symname, + sym_source->name); + } + } else { + found_sym = &found_sym_mem; + /* We found the symbol in a dependency library. */ + INFO("\t\tSymbol [%s:%s, value %lld] is imported from [%s]\n", + source->name, + symname, + found_sym->st_value, + sym_source->name); + } + } /* if symbol is defined in this library... */ + + if (!locals_only) { + /* If a symbol is weak and we haven't found it, then report + an error. We really need to find a way to set its value + to zero. The problem is that it needs to refer to some + section. */ + + FAILIF(NULL == sym_source && + GELF_ST_BIND(sym->st_info) == STB_WEAK, + "Cannot handle weak symbols yet (%s:%s <- %s).\n", + source->name, + symname, + sym_source->name); +#ifdef PERMISSIVE + if (GELF_ST_BIND(sym->st_info) != STB_WEAK && + NULL == sym_source) { + ERROR("ERROR: Can't find symbol [%s:%s] in dependent or " + "default libraries!\n", source->name, symname); + } +#else + FAILIF(GELF_ST_BIND(sym->st_info) != STB_WEAK && + NULL == sym_source, + "Can't find symbol [%s:%s] in dependent or default " + "libraries!\n", + source->name, + symname); +#endif + } /* if (!locals_only) */ + } +#if 0 // too chatty + else + INFO("\t\tno symbol is associated with this relocation\n"); +#endif + + + // We prelink only local symbols when locals_only == 1. + + bool can_relocate = true; + if (!sym_is_local && + (symname[0] == 'd' && symname[1] == 'l' && symname[2] != '\0' && + (!strcmp(symname + 2, "open") || + !strcmp(symname + 2, "close") || + !strcmp(symname + 2, "sym") || + !strcmp(symname + 2, "error")))) { + INFO("********* NOT RELOCATING LIBDL SYMBOL [%s]\n", symname); + can_relocate = false; + } + + if (can_relocate && (sym_is_local || !locals_only)) + { + GElf_Shdr shdr_mem; Elf_Scn *scn; Elf_Data *data; + find_section(source, rel->r_offset, &scn, &shdr_mem, &data); + unsigned *dest = + (unsigned*)(((char *)data->d_buf) + + (rel->r_offset - shdr_mem.sh_addr)); + unsigned rel_type = GELF_R_TYPE(rel->r_info); + char buf[64]; + INFO("\t\t%-15s ", + ebl_reloc_type_name(source->ebl, + GELF_R_TYPE(rel->r_info), + buf, + sizeof(buf))); + + /* Section-name offsets do not change, so we use oldelf to get the + strings. This makes a difference in the second pass of the + perlinker, after the call to adjust_elf, because + source->shstrndx no longer contains the index of the + section-header-strings table. + */ + const char *sname = elf_strptr( + source->oldelf, source->shstrndx, shdr_mem.sh_name); + + switch (rel_type) { + case R_ARM_JUMP_SLOT: + case R_ARM_GLOB_DAT: + case R_ARM_ABS32: + ASSERT(data->d_buf != NULL); + ASSERT(data->d_size >= rel->r_offset - shdr_mem.sh_addr); +#ifdef PERMISSIVE + if (sym_source == NULL) { + ERROR("ERROR: Permissive relocation " + "[%-15s] [%s:%s]: [0x%llx] = ZERO\n", + ebl_reloc_type_name(source->ebl, + GELF_R_TYPE(rel->r_info), + buf, + sizeof(buf)), + sname, + symname, + rel->r_offset); + if (!dry_run) + *dest = 0; + } else +#endif + { + ASSERT(sym_source); + INFO("[%s:%s]: [0x%llx] = 0x%llx + 0x%lx\n", + sname, + symname, + rel->r_offset, + found_sym->st_value, + sym_source->base); + if (!dry_run) + *dest = found_sym->st_value + sym_source->base; + } + num_relocations++; + break; + case R_ARM_RELATIVE: + ASSERT(data->d_buf != NULL); + ASSERT(data->d_size >= rel->r_offset - shdr_mem.sh_addr); + FAILIF(sym != NULL, + "Unsupported RELATIVE form (symbol != 0)...\n"); + INFO("[%s:%s]: [0x%llx] = 0x%x + 0x%lx\n", + sname, + symname ?: "(symbol has no name)", + rel->r_offset, *dest, source->base); + if (!dry_run) + *dest += source->base; + num_relocations++; + break; + case R_ARM_COPY: +#ifdef PERMISSIVE + if (sym_source == NULL) { + ERROR("ERROR: Permissive relocation " + "[%-15s] [%s:%s]: NOT PERFORMING\n", + ebl_reloc_type_name(source->ebl, + GELF_R_TYPE(rel->r_info), + buf, + sizeof(buf)), + sname, + symname); + } else +#endif + { + ASSERT(sym); + ASSERT(sym_source); + GElf_Shdr src_shdr_mem; + Elf_Scn *src_scn; + Elf_Data *src_data; + find_section(sym_source, found_sym->st_value, + &src_scn, + &src_shdr_mem, + &src_data); + INFO("Found [%s:%s (%lld)] in section [%s] .\n", + sym_source->name, + symname, + found_sym->st_value, +#if ELF_STRPTR_IS_BROKEN + (((char *)elf_getdata( + elf_getscn(sym_source->elf, + sym_source->shstrndx), + NULL)->d_buf) + src_shdr_mem.sh_name) +#else + elf_strptr(sym_source->elf, + sym_source->shstrndx, + src_shdr_mem.sh_name) +#endif + ); + + unsigned *src = NULL; + if (src_data->d_buf == NULL) + { +#ifdef PERMISSIVE + if (sym_source->bss.scn == NULL || + elf_ndxscn(src_scn) != + elf_ndxscn(sym_source->bss.scn)) { + ERROR("ERROR: Permissive relocation (NULL source " + "not from .bss) [%-15s] [%s:%s]: " + "NOT PERFORMING\n", + ebl_reloc_type_name(source->ebl, + GELF_R_TYPE(rel->r_info), + buf, + sizeof(buf)), + sname, + symname); + } +#endif + } + else { + ASSERT(src_data->d_size >= + found_sym->st_value - src_shdr_mem.sh_addr); + src = (unsigned*)(((char *)src_data->d_buf) + + (found_sym->st_value - + src_shdr_mem.sh_addr)); + } + ASSERT(symname); + INFO("[%s:%s]: [0x%llx] <- [0x%llx] size %lld\n", + sname, + symname, rel->r_offset, + found_sym->st_value, + found_sym->st_size); + +#ifdef PERMISSIVE + if (src_data->d_buf != NULL || + (sym_source->bss.scn != NULL && + elf_ndxscn(src_scn) == + elf_ndxscn(sym_source->bss.scn))) +#endif/*PERMISSIVE*/ + { + if (data->d_buf == NULL) { + INFO("Incomplete relocation [%-15s] of [%s:%s].\n", + ebl_reloc_type_name(source->ebl, + GELF_R_TYPE(rel->r_info), + buf, + sizeof(buf)), + sname, + symname); + FAILIF(unfinished == NULL, + "You passed unfinished as NULL expecting " + "to handle all relocations, " + "but at least one cannot be handled!\n"); + if (unfinished->num_rels == unfinished->rels_size) { + unfinished->rels_size += 10; + unfinished->rels = (GElf_Rel *)REALLOC( + unfinished->rels, + unfinished->rels_size * + sizeof(GElf_Rel)); + } + unfinished->rels[unfinished->num_rels++] = *rel; + num_relocations--; + (*num_unfinished_relocs)++; + } + else { + if (src_data->d_buf != NULL) + { + ASSERT(data->d_buf != NULL); + ASSERT(data->d_size >= rel->r_offset - + shdr_mem.sh_addr); + if (!dry_run) + memcpy(dest, src, found_sym->st_size); + } + else { + ASSERT(src == NULL); + ASSERT(elf_ndxscn(src_scn) == + elf_ndxscn(sym_source->bss.scn)); + if (!dry_run) + memset(dest, 0, found_sym->st_size); + } + } + } + num_relocations++; + } + break; + default: + FAILIF(1, "Unknown relocation type %d!\n", rel_type); + } // switch + } // relocate + else { + INFO("\t\tNot relocating symbol [%s]%s\n", + symname, + (can_relocate ? ", relocating only locals" : + ", which is a libdl symbol")); + FAILIF(unfinished == NULL, + "You passed unfinished as NULL expecting to handle all " + "relocations, but at least one cannot be handled!\n"); + if (unfinished->num_rels == unfinished->rels_size) { + unfinished->rels_size += 10; + unfinished->rels = (GElf_Rel *)REALLOC( + unfinished->rels, + unfinished->rels_size * + sizeof(GElf_Rel)); + } + unfinished->rels[unfinished->num_rels++] = *rel; + (*num_unfinished_relocs)++; + } + } // for each relocation entry + + return num_relocations; +} + +static int prelink(source_t *source, + int locals_only, + bool dry_run, + char **lib_lookup_dirs, int num_lib_lookup_dirs, + char **default_libs, int num_default_libs, + int *num_unfinished_relocs) +{ + INFO("Prelinking [%s] (number of relocation sections: %d)%s...\n", + source->name, source->num_relocation_sections, + (dry_run ? " (dry run)" : "")); + int num_relocations = 0; + int rel_scn_idx; + for (rel_scn_idx = 0; rel_scn_idx < source->num_relocation_sections; + rel_scn_idx++) + { + section_info_t *reloc_scn = source->relocation_sections + rel_scn_idx; + unfinished_relocation_t *unfinished = source->unfinished + rel_scn_idx; + + /* We haven't modified the shstrtab section, and so shdr->sh_name has + the same value as before. Thus we look up the name based on the old + ELF handle. We cannot use shstrndx on the new ELF handle because + the index of the shstrtab section may have changed (and calling + elf_getshstrndx() returns the same section index, so libelf can't + handle thise ither). + + If reloc_scn->info is available, we can assert that the + section-name has not changed. If this assertion fails, + then we cannot use the elf_strptr() trick below to get + the section name. One solution would be to save it in + the section_info_t structure. + */ + ASSERT(reloc_scn->info == NULL || + reloc_scn->shdr.sh_name == reloc_scn->info->old_shdr.sh_name); + const char *sname = + elf_strptr(source->oldelf, + source->shstrndx, + reloc_scn->shdr.sh_name); + ASSERT(sname != NULL); + + INFO("\n\tIterating relocation section [%s]...\n", sname); + + /* In general, the new size of the section differs from the original + size of the section, because we can handle some of the relocations. + This was communicated to adjust_elf, which modified the ELF file + according to the new section sizes. Now, when prelink() does the + actual work of prelinking, it needs to know the original size of the + relocation section so that it can see all of the original relocation + entries! + */ + size_t d_size = reloc_scn->data->d_size; + if (reloc_scn->info != NULL && + reloc_scn->data->d_size != reloc_scn->info->old_shdr.sh_size) + { + INFO("Setting size of section [%s] to from new size %d to old " + "size %lld temporarily (so prelinker can see all " + "relocations).\n", + reloc_scn->info->name, + d_size, + reloc_scn->info->old_shdr.sh_size); + reloc_scn->data->d_size = reloc_scn->info->old_shdr.sh_size; + } + + num_relocations += + do_prelink(source, + reloc_scn->data, reloc_scn->shdr.sh_entsize, + unfinished, + locals_only, dry_run, + lib_lookup_dirs, num_lib_lookup_dirs, + default_libs, num_default_libs, + num_unfinished_relocs); + + if (reloc_scn->data->d_size != d_size) + { + ASSERT(reloc_scn->info != NULL); + INFO("Resetting size of section [%s] to %d\n", + reloc_scn->info->name, + d_size); + reloc_scn->data->d_size = d_size; + } + } + + /* Now prelink those relocation sections which were fully handled, and + therefore removed. They are not a part of the + source->relocation_sections[] array anymore, but we can find them by + scanning source->shdr_info[] and looking for sections with idx == 0. + */ + + if (ADJUST_ELF && source->shdr_info != NULL) { + /* Walk over the shdr_info[] array to see if we've removed any + relocation sections. prelink() those sections as well. + */ + int i; + for (i = 0; i < source->shnum; i++) { + shdr_info_t *info = source->shdr_info + i; + if (info->idx == 0 && + (info->shdr.sh_type == SHT_REL || + info->shdr.sh_type == SHT_RELA)) { + + Elf_Data *data = elf_getdata(info->scn, NULL); + ASSERT(data->d_size == 0); + data->d_size = info->old_shdr.sh_size; + + INFO("\n\tIterating relocation section [%s], which was " + "discarded (size %d, entry size %lld).\n", + info->name, + data->d_size, + info->old_shdr.sh_entsize); + + num_relocations += + do_prelink(source, + data, info->old_shdr.sh_entsize, + NULL, /* the section was fully handled */ + locals_only, dry_run, + lib_lookup_dirs, num_lib_lookup_dirs, + default_libs, num_default_libs, + num_unfinished_relocs); + + data->d_size = 0; + } + } + } + return num_relocations; +} + +static char * find_file(const char *libname, + char **lib_lookup_dirs, + int num_lib_lookup_dirs) { + if (libname[0] == '/') { + /* This is an absolute path name--just return it. */ + /* INFO("ABSOLUTE PATH: [%s].\n", libname); */ + return strdup(libname); + } else { + /* First try the working directory. */ + int fd; + if ((fd = open(libname, O_RDONLY)) > 0) { + close(fd); + /* INFO("FOUND IN CURRENT DIR: [%s].\n", libname); */ + return strdup(libname); + } else { + /* Iterate over all library paths. For each path, append the file + name and see if there is a file at that place. If that fails, + bail out. */ + + char *name; + while (num_lib_lookup_dirs--) { + size_t lib_len = strlen(*lib_lookup_dirs); + /* one extra character for the slash, and another for the + terminating NULL. */ + name = (char *)MALLOC(lib_len + strlen(libname) + 2); + strcpy(name, *lib_lookup_dirs); + name[lib_len] = '/'; + strcpy(name + lib_len + 1, libname); + if ((fd = open(name, O_RDONLY)) > 0) { + close(fd); + /* INFO("FOUND: [%s] in [%s].\n", libname, name); */ + return name; + } + INFO("NOT FOUND: [%s] in [%s].\n", libname, name); + free(name); + } + } + } + return NULL; +} + +static void adjust_dynamic_segment_entry_size(source_t *source, + dt_rel_info_t *dyn) +{ + /* Update the size entry in the DT_DYNAMIC segment. */ + GElf_Dyn *dyn_entry, dyn_entry_mem; + dyn_entry = gelf_getdyn(source->dynamic.data, + dyn->sz_idx, + &dyn_entry_mem); + FAILIF_LIBELF(NULL == dyn_entry, gelf_getdyn); + /* If we are calling this function to adjust the size of the dynamic entry, + then there should be some unfinished relocations remaining. If there + are none, then we should remove the entry from the dynamic section + altogether. + */ + ASSERT(dyn->num_unfinished_relocs); + + size_t relsize = gelf_fsize(source->elf, + ELF_T_REL, + 1, + source->elf_hdr.e_version); + + if (unlikely(verbose_flag)) { + char buf[64]; + INFO("Updating entry %d: [%-10s], %08llx --> %08x\n", + dyn->sz_idx, + ebl_dynamic_tag_name (source->ebl, dyn_entry->d_tag, + buf, sizeof (buf)), + dyn_entry->d_un.d_val, + dyn->num_unfinished_relocs * relsize); + } + + dyn_entry->d_un.d_val = dyn->num_unfinished_relocs * relsize; + + FAILIF_LIBELF(!gelf_update_dyn(source->dynamic.data, + dyn->sz_idx, + dyn_entry), + gelf_update_dyn); +} + +static void adjust_dynamic_segment_entries(source_t *source) +{ + /* This function many remove entries from the dynamic segment, but it won't + resize the relevant section. It'll just fill the remainted with empty + DT entries. + + FIXME: This is not guaranteed right now. If a dynamic segment does not + end with null DT entries, I think this will break. + */ + FAILIF(source->rel.processed, + "More than one section matches DT_REL entry in dynamic segment!\n"); + FAILIF(source->jmprel.processed, + "More than one section matches DT_JMPREL entry in " + "dynamic segment!\n"); + source->rel.processed = + source->jmprel.processed = 1; + + if (source->rel.num_unfinished_relocs > 0) + adjust_dynamic_segment_entry_size(source, &source->rel); + + if (source->jmprel.num_unfinished_relocs > 0) + adjust_dynamic_segment_entry_size(source, &source->jmprel); + + /* If at least one of the entries is empty, then we need to remove it. We + have already adjusted the size of the other. + */ + if (source->rel.num_unfinished_relocs == 0 || + source->jmprel.num_unfinished_relocs == 0) + { + /* We need to delete the DT_REL/DT_RELSZ and DT_PLTREL/DT_PLTRELSZ + entries from the dynamic segment. */ + + GElf_Dyn *dyn_entry, dyn_entry_mem; + size_t dynidx, updateidx; + + size_t numdyn = + source->dynamic.shdr.sh_size / + source->dynamic.shdr.sh_entsize; + + for (updateidx = dynidx = 0; dynidx < numdyn; dynidx++) + { + dyn_entry = gelf_getdyn(source->dynamic.data, + dynidx, + &dyn_entry_mem); + FAILIF_LIBELF(NULL == dyn_entry, gelf_getdyn); + if ((source->rel.num_unfinished_relocs == 0 && + (dynidx == source->rel.idx || + dynidx == source->rel.sz_idx)) || + (source->jmprel.num_unfinished_relocs == 0 && + (dynidx == source->jmprel.idx || + dynidx == source->jmprel.sz_idx))) + { + if (unlikely(verbose_flag)) { + char buf[64]; + INFO("\t(!)\tRemoving entry %02d: [%-10s], %08llx\n", + dynidx, + ebl_dynamic_tag_name (source->ebl, dyn_entry->d_tag, + buf, sizeof (buf)), + dyn_entry->d_un.d_val); + } + continue; + } + + if (unlikely(verbose_flag)) { + char buf[64]; + INFO("\t\tKeeping entry %02d: [%-10s], %08llx\n", + dynidx, + ebl_dynamic_tag_name (source->ebl, dyn_entry->d_tag, + buf, sizeof (buf)), + dyn_entry->d_un.d_val); + } + + gelf_update_dyn(source->dynamic.data, + updateidx, + &dyn_entry_mem); + updateidx++; + } + } +} /* adjust_dynamic_segment_entries */ + +static bool adjust_dynamic_segment_for(source_t *source, + dt_rel_info_t *dyn, + bool adjust_section_size_only) +{ + bool dropped_sections = false; + + /* Go over the sections that belong to this dynamic range. */ + dyn->num_unfinished_relocs = 0; + if (dyn->sections) { + int num_scns, idx; + range_t *scns = get_sorted_ranges(dyn->sections, &num_scns); + + INFO("\tdynamic range %s:[%lld, %lld) contains %d sections.\n", + source->name, + dyn->addr, + dyn->addr + dyn->size, + num_scns); + + ASSERT(scns); + int next_idx = 0, next_rel_off = 0; + /* The total number of unfinished relocations for this dynamic + * entry. */ + section_info_t *next = (section_info_t *)scns[next_idx].user; + section_info_t *first = next; + ASSERT(first); + for (idx = 0; idx < num_scns; idx++) { + section_info_t *reloc_scn = (section_info_t *)scns[idx].user; + size_t rel_scn_idx = reloc_scn - source->relocation_sections; + ASSERT(rel_scn_idx < (size_t)source->num_relocation_sections); + unfinished_relocation_t *unfinished = + &source->unfinished[rel_scn_idx]; + int unf_idx; + + ASSERT(reloc_scn->info == NULL || + reloc_scn->shdr.sh_name == + reloc_scn->info->old_shdr.sh_name); + const char *sname = + elf_strptr(source->oldelf, + source->shstrndx, + reloc_scn->shdr.sh_name); + + INFO("\tsection [%s] contains %d unfinished relocs.\n", + sname, + unfinished->num_rels); + + for (unf_idx = 0; unf_idx < unfinished->num_rels; unf_idx++) + { + /* There are unfinished relocations. Copy them forward to the + lowest section we can. */ + + while (next_rel_off == + (int)(next->shdr.sh_size/next->shdr.sh_entsize)) + { + INFO("\tsection [%s] has filled up with %d unfinished " + "relocs.\n", + sname, + next_rel_off); + + next_idx++; + ASSERT(next_idx <= idx); + next = (section_info_t *)scns[next_idx].user; + next_rel_off = 0; + } + + if (!adjust_section_size_only) { + INFO("\t\tmoving unfinished relocation %2d to [%s:%d]\n", + unf_idx, + sname, + next_rel_off); + FAILIF_LIBELF(0 == + gelf_update_rel(next->data, + next_rel_off, + &unfinished->rels[unf_idx]), + gelf_update_rel); + } + + next_rel_off++; + dyn->num_unfinished_relocs++; + } + } /* for */ + + /* Set the size of the last section, and mark all subsequent + sections for removal. At this point, next is the section + to which we last wrote data, next_rel_off is the offset before + which we wrote the last relocation, and so next_rel_off * + relsize is the new size of the section. + */ + + bool adjust_file = ADJUST_ELF && source->elf_hdr.e_type != ET_EXEC; + if (adjust_file && !source->dry_run) + { + size_t relsize = gelf_fsize(source->elf, + ELF_T_REL, + 1, + source->elf_hdr.e_version); + + ASSERT(next->info == NULL || + next->shdr.sh_name == next->info->old_shdr.sh_name); + const char *sname = + elf_strptr(source->oldelf, + source->shstrndx, + next->shdr.sh_name); + + INFO("\tsection [%s] (index %d) has %d unfinished relocs, " + "changing its size to %ld bytes (from %ld bytes).\n", + sname, + elf_ndxscn(next->scn), + next_rel_off, + (long)(next_rel_off * relsize), + (long)(next->shdr.sh_size)); + + /* source->shdr_info[] must be allocated prior to calling this + function. This is in fact done in process_file(), by calling + setup_shdr_info() just before we call adjust_dynamic_segment(). + */ + ASSERT(source->shdr_info != NULL); + + /* We do not update the data field of shdr_info[], because it does + not exist yet (with ADJUST_ELF != 0). We create the new section + and section data after the first call to prelink(). For now, we + save the results of our analysis by modifying the sh_size field + of the section header. When we create the new sections' data, + we set the size of the data from the sh_size fields of the + section headers. + + NOTE: The assertion applies only to the first call of + adjust_dynamic_segment (which calls this function). By + the second call, we've already created the data for the + new sections. The only sections for which we haven't + created data are the relocation sections we are removing. + */ +#ifdef DEBUG + ASSERT((!adjust_section_size_only && + (source->shdr_info[elf_ndxscn(next->scn)].idx > 0)) || + source->shdr_info[elf_ndxscn(next->scn)].data == NULL); +#endif + + //FIXME: what else do we need to do here? Do we need to update + // another copy of the shdr so that it's picked up when we + // commit the file? + next->shdr.sh_size = next_rel_off * relsize; + source->shdr_info[elf_ndxscn(next->scn)].shdr.sh_size = + next->shdr.sh_size; + if (next_rel_off * relsize == 0) { +#ifdef REMOVE_HANDLED_SECTIONS + INFO("\tsection [%s] (index %d) is now empty, marking for " + "removal.\n", + sname, + elf_ndxscn(next->scn)); + source->shdr_info[elf_ndxscn(next->scn)].idx = 0; + dropped_sections = true; +#endif + } + + while (++next_idx < num_scns) { + next = (section_info_t *)scns[next_idx].user; +#ifdef REMOVE_HANDLED_SECTIONS + ASSERT(next->info == NULL || + next->shdr.sh_name == next->info->old_shdr.sh_name); + const char *sname = + elf_strptr(source->oldelf, + source->shstrndx, + next->shdr.sh_name); + INFO("\tsection [%s] (index %d) is now empty, marking for " + "removal.\n", + sname, + elf_ndxscn(next->scn)); + /* mark for removal */ + source->shdr_info[elf_ndxscn(next->scn)].idx = 0; + dropped_sections = true; +#endif + } + } + + } /* if (dyn->sections) */ + else { + /* The dynamic entry won't have any sections when it itself doesn't + exist. This could happen when we remove all relocation sections + from a dynamic entry because we have managed to handle all + relocations in them. + */ + INFO("\tNo section for dynamic entry!\n"); + } + + return dropped_sections; +} + +static bool adjust_dynamic_segment(source_t *source, + bool adjust_section_size_only) +{ + bool dropped_section; + INFO("Adjusting dynamic segment%s.\n", + (adjust_section_size_only ? " (section sizes only)" : "")); + INFO("\tadjusting dynamic segment REL.\n"); + dropped_section = + adjust_dynamic_segment_for(source, &source->rel, + adjust_section_size_only); + INFO("\tadjusting dynamic segment JMPREL.\n"); + dropped_section = + adjust_dynamic_segment_for(source, &source->jmprel, + adjust_section_size_only) || + dropped_section; + if (!adjust_section_size_only) + adjust_dynamic_segment_entries(source); + return dropped_section; +} + +static void match_relocation_sections_to_dynamic_ranges(source_t *source) +{ + /* We've gathered all the DT_DYNAMIC entries; now we need to figure out + which relocation sections fit in which range as described by the + entries. + */ + + int relidx; + for (relidx = 0; relidx < source->num_relocation_sections; relidx++) { + section_info_t *reloc_scn = &source->relocation_sections[relidx]; + + int index = elf_ndxscn(reloc_scn->scn); + + ASSERT(reloc_scn->info == NULL || + reloc_scn->shdr.sh_name == reloc_scn->info->old_shdr.sh_name); + const char *sname = + elf_strptr(source->oldelf, + source->shstrndx, + reloc_scn->shdr.sh_name); + + INFO("Checking section [%s], index %d, for match to dynamic ranges\n", + sname, index); + if (source->shdr_info == NULL || reloc_scn->info->idx > 0) { + if (source->rel.addr && + source->rel.addr <= reloc_scn->shdr.sh_addr && + reloc_scn->shdr.sh_addr < source->rel.addr + source->rel.size) + { + /* The entire section must fit in the dynamic range. */ + if((reloc_scn->shdr.sh_addr + reloc_scn->shdr.sh_size) > + (source->rel.addr + source->rel.size)) + { + PRINT("WARNING: In [%s], section %s:[%lld,%lld) " + "is not fully contained in dynamic range " + "[%lld,%lld)!\n", + source->name, + sname, + reloc_scn->shdr.sh_addr, + reloc_scn->shdr.sh_addr + + reloc_scn->shdr.sh_size, + source->rel.addr, + source->rel.addr + source->rel.size); + } + + if (NULL == source->rel.sections) { + source->rel.sections = init_range_list(); + ASSERT(source->rel.sections); + } + add_unique_range_nosort(source->rel.sections, + reloc_scn->shdr.sh_addr, + reloc_scn->shdr.sh_size, + reloc_scn, + NULL, + NULL); + INFO("\tSection [%s] matches dynamic range REL.\n", + sname); + } + else if (source->jmprel.addr && + source->jmprel.addr <= reloc_scn->shdr.sh_addr && + reloc_scn->shdr.sh_addr <= source->jmprel.addr + + source->jmprel.size) + { + if((reloc_scn->shdr.sh_addr + reloc_scn->shdr.sh_size) > + (source->jmprel.addr + source->jmprel.size)) + { + PRINT("WARNING: In [%s], section %s:[%lld,%lld) " + "is not fully " + "contained in dynamic range [%lld,%lld)!\n", + source->name, + sname, + reloc_scn->shdr.sh_addr, + reloc_scn->shdr.sh_addr + + reloc_scn->shdr.sh_size, + source->jmprel.addr, + source->jmprel.addr + source->jmprel.size); + } + + if (NULL == source->jmprel.sections) { + source->jmprel.sections = init_range_list(); + ASSERT(source->jmprel.sections); + } + add_unique_range_nosort(source->jmprel.sections, + reloc_scn->shdr.sh_addr, + reloc_scn->shdr.sh_size, + reloc_scn, + NULL, + NULL); + INFO("\tSection [%s] matches dynamic range JMPREL.\n", + sname); + } + else + PRINT("WARNING: Relocation section [%s:%s] does not match " + "any DT_ entry.\n", + source->name, + sname); + } + else { + INFO("Section [%s] was removed, not matching it to dynamic " + "ranges.\n", + sname); + } + } /* for ... */ + + if (source->rel.sections) sort_ranges(source->rel.sections); + if (source->jmprel.sections) sort_ranges(source->jmprel.sections); +} + +static void drop_sections(source_t *source) +{ + INFO("We are dropping some sections from [%s]--creating section entries " + "only for remaining sections.\n", + source->name); + /* Renumber the sections. The numbers for the sections after those we are + dropping will be shifted back by the number of dropped sections. */ + int cnt, idx; + for (cnt = idx = 1; cnt < source->shnum; ++cnt) { + if (source->shdr_info[cnt].idx > 0) { + source->shdr_info[cnt].idx = idx++; + + /* Create a new section. */ + FAILIF_LIBELF((source->shdr_info[cnt].newscn = + elf_newscn(source->elf)) == NULL, elf_newscn); + ASSERT(elf_ndxscn (source->shdr_info[cnt].newscn) == + source->shdr_info[cnt].idx); + + /* Copy the section data */ + Elf_Data *olddata = + elf_getdata(source->shdr_info[cnt].scn, // old section + NULL); + FAILIF_LIBELF(NULL == olddata, elf_getdata); + Elf_Data *data = + elf_newdata(source->shdr_info[cnt].newscn); + FAILIF_LIBELF(NULL == data, elf_newdata); + *data = *olddata; +#if COPY_SECTION_DATA_BUFFER + if (olddata->d_buf != NULL) { + data->d_buf = MALLOC(data->d_size); + memcpy(data->d_buf, olddata->d_buf, olddata->d_size); + } +#endif + source->shdr_info[cnt].data = data; + + if (data->d_size != + source->shdr_info[cnt].shdr.sh_size) { + INFO("Trimming new-section data from %d to %lld bytes " + "(as calculated by adjust_dynamic_segment()).\n", + data->d_size, + source->shdr_info[cnt].shdr.sh_size); + data->d_size = + source->shdr_info[cnt].shdr.sh_size; + } + + INFO("\tsection [%s] (old offset %lld, old size %lld) " + "will have index %d (was %d), new size %d\n", + source->shdr_info[cnt].name, + source->shdr_info[cnt].old_shdr.sh_offset, + source->shdr_info[cnt].old_shdr.sh_size, + source->shdr_info[cnt].idx, + elf_ndxscn(source->shdr_info[cnt].scn), + data->d_size); + } else { + INFO("\tIgnoring section [%s] (offset %lld, size %lld, index %d), " + "it will be discarded.\n", + source->shdr_info[cnt].name, + source->shdr_info[cnt].shdr.sh_offset, + source->shdr_info[cnt].shdr.sh_size, + elf_ndxscn(source->shdr_info[cnt].scn)); + } + + /* NOTE: We mark use_old_shdr_for_relocation_calculations even for the + sections we are removing. adjust_elf has an assertion that makes + sure that if the values for the size of a section according to its + header and its data structure differ, then we are using explicitly + the old section header for calculations, and that the section in + question is a relocation section. + */ + source->shdr_info[cnt].use_old_shdr_for_relocation_calculations = true; + } /* for */ +} + +static source_t* process_file(const char *filename, + const char *output, int is_file, + void (*report_library_size_in_memory)( + const char *name, off_t fsize), + unsigned (*get_next_link_address)( + const char *name), + int locals_only, + char **lib_lookup_dirs, + int num_lib_lookup_dirs, + char **default_libs, + int num_default_libs, + int dry_run, + int *total_num_handled_relocs, + int *total_num_unhandled_relocs) +{ + /* Look up the file in the list of already-handles files, which are + represented by source_t structs. If we do not find the file, then we + haven't prelinked it yet. If we find it, then we have, so we do + nothing. Keep in mind that apriori operates on an entire collection + of files, and if application A used library L, and so does application + B, if we process A first, then by the time we get to B we will have + prelinked L already; that's why we check first to see if a library has + been prelinked. + */ + source_t *source = + find_source(filename, lib_lookup_dirs, num_lib_lookup_dirs); + if (NULL == source) { + /* If we could not find the source, then it hasn't been processed yet, + so we go ahead and process it! */ + INFO("Processing [%s].\n", filename); + char *full = find_file(filename, lib_lookup_dirs, num_lib_lookup_dirs); + FAILIF(NULL == full, + "Could not find [%s] in the current directory or in any of " + "the search paths!\n", filename); + + unsigned base = get_next_link_address(full); + + source = init_source(full, output, is_file, base, dry_run); + + if (source == NULL) { + INFO("File [%s] is a static executable.\n", filename); + return NULL; + } + ASSERT(source->dynamic.scn != NULL); + + /* We need to increment the next prelink address only when the file we + are currently handing is a shared library. Executables do not need + to be prelinked at a different address, they are always at address + zero. + + Also, if we are prelinking locals only, then we are handling a + single file per invokation of apriori, so there is no need to + increment the prelink address unless there is a global prelink map, + in which case we do need to check to see if the library isn't + running into its neighbouts in the prelink map. + */ + if (source->oldelf_hdr.e_type != ET_EXEC && + (!locals_only || + report_library_size_in_memory == + pm_report_library_size_in_memory)) { + /* This sets the next link address only if an increment was not + specified by the user. If an address increment was specified, + then we just check to make sure that the file size is less than + the increment. + + NOTE: The file size is the absolute highest number of bytes that + the file may occupy in memory, if the entire file is loaded, but + this is almost next the case. A file will often have sections + which are not loaded, which could add a lot of size. That's why + we start off with the file size and then subtract the size of + the biggest sections that will not get loaded, which are the + varios DWARF sections, all of which of which are named starting + with ".debug_". + + We could do better than this (by caculating exactly how many + bytes from that file will be loaded), but that's an overkill. + Unless the prelink-address increment becomes too small, the file + size after subtracting the sizes of the DWARF section will be a + good-enough upper bound. + */ + + unsigned long fsize = source->elf_file_info.st_size; + INFO("Calculating loadable file size for next link address. " + "Starting with %ld.\n", fsize); + if (true) { + Elf_Scn *scn = NULL; + GElf_Shdr shdr_mem, *shdr; + const char *scn_name; + while ((scn = elf_nextscn (source->oldelf, scn)) != NULL) { + shdr = gelf_getshdr(scn, &shdr_mem); + FAILIF_LIBELF(NULL == shdr, gelf_getshdr); + scn_name = elf_strptr (source->oldelf, + source->shstrndx, shdr->sh_name); + ASSERT(scn_name != NULL); + + if (!(shdr->sh_flags & SHF_ALLOC)) { + INFO("\tDecrementing by %lld on account of section " + "[%s].\n", + shdr->sh_size, + scn_name); + fsize -= shdr->sh_size; + } + } + } + INFO("Done calculating loadable file size for next link address: " + "Final value is %ld.\n", fsize); + report_library_size_in_memory(source->name, fsize); + } + + /* Identify the dynamic segment and process it. Specifically, we find + out what dependencies, if any, this file has. Whenever we encounter + such a dependency, we process it recursively; we find out where the + various relocation information sections are stored. */ + + size_t dynidx; + GElf_Dyn *dyn, dyn_mem; + size_t numdyn = + source->dynamic.shdr.sh_size / + source->dynamic.shdr.sh_entsize; + ASSERT(source->dynamic.shdr.sh_size == source->dynamic.data->d_size); + + source->rel.idx = source->rel.sz_idx = -1; + source->jmprel.idx = source->jmprel.sz_idx = -1; + + for (dynidx = 0; dynidx < numdyn; dynidx++) { + dyn = gelf_getdyn (source->dynamic.data, + dynidx, + &dyn_mem); + FAILIF_LIBELF(NULL == dyn, gelf_getdyn); + /* When we are processing only the local relocations in a file, + we don't need to handle any of the dependencies. It won't + hurt if we do, but we will be doing unnecessary work. + */ + switch (dyn->d_tag) + { + case DT_NEEDED: + if (!locals_only) { + /* Process the needed library recursively. + */ + const char *dep_lib = +#if ELF_STRPTR_IS_BROKEN + (((char *)elf_getdata( + elf_getscn(source->elf, + source->dynamic.shdr.sh_link), + NULL)->d_buf) + dyn->d_un.d_val); +#else + elf_strptr (source->elf, + source->dynamic.shdr.sh_link, + dyn->d_un.d_val); +#endif + ASSERT(dep_lib != NULL); + INFO("[%s] depends on [%s].\n", filename, dep_lib); + ASSERT(output == NULL || is_file == 0); + source_t *dep = process_file(dep_lib, + output, is_file, + report_library_size_in_memory, + get_next_link_address, + locals_only, + lib_lookup_dirs, + num_lib_lookup_dirs, + default_libs, + num_default_libs, + dry_run, + total_num_handled_relocs, + total_num_unhandled_relocs); + + /* Add the library to the dependency list. */ + if (source->num_lib_deps == source->lib_deps_size) { + source->lib_deps_size += 10; + source->lib_deps = REALLOC(source->lib_deps, + source->lib_deps_size * + sizeof(source_t *)); + } + source->lib_deps[source->num_lib_deps++] = dep; + } + break; + case DT_JMPREL: + source->jmprel.idx = dynidx; + source->jmprel.addr = dyn->d_un.d_ptr; + break; + case DT_PLTRELSZ: + source->jmprel.sz_idx = dynidx; + source->jmprel.size = dyn->d_un.d_val; + break; + case DT_REL: + source->rel.idx = dynidx; + source->rel.addr = dyn->d_un.d_ptr; + break; + case DT_RELSZ: + source->rel.sz_idx = dynidx; + source->rel.size = dyn->d_un.d_val; + break; + case DT_RELA: + case DT_RELASZ: + FAILIF(1, "Can't handle DT_RELA and DT_RELASZ entries!\n"); + break; + } /* switch */ + } /* for each dynamic entry... */ + + INFO("Handling [%s].\n", filename); + +#ifdef SUPPORT_ANDROID_PRELINK_TAGS + if (!source->prelinked) +#endif + { + /* When ADJUST_ELF is defined, this call to prelink is a dry run + intended to calculate the number of relocations that could not + be handled. This, in turn, allows us to calculate the amount by + which we can shrink the various relocation sections before we + call adjust_elf. After we've adjusted the sections, we will + call prelink() one more time to do the actual work. + + NOTE: Even when ADJUST_ELF != 0, we cannot adjust an ELF file + that is an executabe, because an executable is not PIC. + */ + + int num_unfinished_relocs = 0; + bool adjust_file = ADJUST_ELF && source->elf_hdr.e_type != ET_EXEC; + INFO("\n\n\tPRELINKING %s\n\n", + adjust_file ? + "(CALCULATE NUMBER OF HANDLED RELOCATIONS)" : + "(ACTUAL)"); + int num_relocs = prelink(source, locals_only, + adjust_file || dry_run, + lib_lookup_dirs, num_lib_lookup_dirs, + default_libs, num_default_libs, + &num_unfinished_relocs); + INFO("[%s]: (calculate changes) handled %d, could not handle %d " + "relocations.\n", + source->name, + num_relocs, + num_unfinished_relocs); + + if (adjust_file && !dry_run) + { + /* Find out the new section sizes of the relocation sections, + but do not move any relocations around, because adjust_elf + needs to know about all relocations in order to adjust the + file correctly. + */ + match_relocation_sections_to_dynamic_ranges(source); + + /* We haven't set up source->shdr_info[] yet, so we do it now. + + NOTE: setup_shdr_info() depends only on source->oldelf, not + on source->elf! source->elf is not even defined yet. We + initialize source->shdr_info[] based on the section + information of the unmodified ELF file, and then make our + modifications in the call to adjust_dynamic_segment() based + on this information. adjust_dynamic_segment() will + rearrange the unhandled relocations in the beginning of + their relocation sections, and adjust the size of those + relocation sections. In the case when a relocation section + is completely handled, adjust_dynamic_segment() will mark it + for removal by function adjust_elf. + */ + + ASSERT(source->elf == source->oldelf); + ASSERT(source->shdr_info == NULL); + setup_shdr_info(source); + ASSERT(source->shdr_info != NULL); + + INFO("\n\n\tADJUSTING DYNAMIC SEGMENT " + "(CALCULATE CHANGES)\n\n"); + bool drop_some_sections = adjust_dynamic_segment(source, true); + + /* Reopen the elf file! Note that we are not doing a dry run + (the if statement above makes sure of that.) + + NOTE: We call init_elf() after we called + adjust_dynamic_segment() in order to have + adjust_dynamic_segment() refer to source->oldelf when + it refers to source->elf. Since + adjust_dynamic_segment doesn't actually write to the + ELF file, this is OK. adjust_dynamic_segment() + updates the sh_size fields of saved section headers + and optionally marks sections for removal. + + Having adjust_dynamic_segment() refer to + source->oldelf means that we'll have access to + section-name strings so we can print them out in our + logging and debug output. + */ + source->elf = init_elf(source, false); + + /* This is the same code as in init_source() after the call to + * init_elf(). */ + ASSERT(source->elf != source->oldelf); + ebl_closebackend(source->ebl); + source->ebl = ebl_openbackend (source->elf); + FAILIF_LIBELF(NULL == source->ebl, ebl_openbackend); +#ifdef ARM_SPECIFIC_HACKS + FAILIF_LIBELF(0 != arm_init(source->elf, + source->elf_hdr.e_machine, + source->ebl, sizeof(Ebl)), + arm_init); +#endif/*ARM_SPECIFIC_HACKS*/ + + if (drop_some_sections) + drop_sections(source); + else { + INFO("All sections remain in [%s]--we are changing at " + "most section sizes.\n", source->name); + create_elf_sections(source, NULL); + int cnt, idx; + for (cnt = idx = 1; cnt < source->shnum; ++cnt) { + Elf_Data *data = elf_getdata( + source->shdr_info[cnt].newscn, // new section + NULL); + if (data->d_size != + source->shdr_info[cnt].shdr.sh_size) { + INFO("Trimming new-section data from %d to %lld " + "bytes (as calculated by " + "adjust_dynamic_segment()).\n", + data->d_size, + source->shdr_info[cnt].shdr.sh_size); + data->d_size = source->shdr_info[cnt].shdr.sh_size; + } + } + } + + /* Shrink it! */ + INFO("\n\n\tADJUSTING ELF\n\n"); + adjust_elf( + source->oldelf, source->name, + source->elf, source->name, + source->ebl, + &source->old_ehdr_mem, + NULL, 0, // no symbol filter + source->shdr_info, // information on how to adjust the ELF + source->shnum, // length of source->shdr_info[] + source->phdr_info, // program-header info + source->shnum, // irrelevant--we're not rebuilding shstrtab + source->shnum, // number of sections in file + source->shstrndx, // index of shstrtab (both in + // shdr_info[] and as a section index) + NULL, // irrelevant, since we are not rebuilding shstrtab + drop_some_sections, // some sections are being dropped + elf_ndxscn(source->dynamic.scn), // index of .dynamic + elf_ndxscn(source->symtab.scn), // index of .dynsym + 1, // allow shady business + &source->shstrtab_data, + true, + false); // do not rebuild shstrtab + + INFO("\n\n\tREINITIALIZING STRUCTURES " + "(TO CONTAIN ADJUSTMENTS)\n\n"); + reinit_source(source); + + INFO("\n\n\tPRELINKING (ACTUAL)\n\n"); +#ifdef DEBUG + int old_num_unfinished_relocs = num_unfinished_relocs; +#endif + num_unfinished_relocs = 0; +#ifdef DEBUG + int num_relocs_take_two = +#endif + prelink(source, locals_only, + false, /* not a dry run */ + lib_lookup_dirs, num_lib_lookup_dirs, + default_libs, num_default_libs, + &num_unfinished_relocs); + + /* The numbers for the total number of relocations and the + number of unhandled relocations between the first and second + invokationof prelink() must be the same! The first time we + ran prelink() just to calculate the numbers so that we could + calculate the adjustments to pass to adjust_elf, and the + second time we actually carry out the prelinking; the + numbers must stay the same! + */ + ASSERT(num_relocs == num_relocs_take_two); + ASSERT(old_num_unfinished_relocs == num_unfinished_relocs); + + INFO("[%s]: (actual prelink) handled %d, could not " + "handle %d relocations.\n", + source->name, + num_relocs, + num_unfinished_relocs); + } /* if (adjust_elf && !dry_run) */ + + *total_num_handled_relocs += num_relocs; + *total_num_unhandled_relocs += num_unfinished_relocs; + + if(num_unfinished_relocs != 0 && + source->elf_hdr.e_type != ET_EXEC && + !locals_only) + { + /* One reason you could have unfinished relocations in an + executable file is if this file used dlopen() and friends. + We do not adjust relocation entries to those symbols, + because libdl is a dummy only--the real functions are + provided for by the dynamic linker itsef. + + NOTE FIXME HACK: This is specific to the Android dynamic + linker, and may not be true in other cases. + */ + PRINT("WARNING: Expecting to have unhandled relocations only " + "for executables (%s is not an executable)!\n", + source->name); + } + + match_relocation_sections_to_dynamic_ranges(source); + + /* Now, for each relocation section, check to see if its address + matches one of the DT_DYNAMIC relocation pointers. If so, then + if the section has no unhandled relocations, simply set the + associated DT_DYNAMIC entry's size to zero. If the section does + have unhandled entries, then lump them all together at the front + of the respective section and update the size of the respective + DT_DYNAMIC entry to the new size of the section. A better + approach would be do delete a relocation section if it has been + fully relocated and to remove its entry from the DT_DYNAMIC + array, and for relocation entries that still have some + relocations in them, we should shrink the section if that won't + violate relative offsets. This is more work, however, and for + the speed improvement we expect from a prelinker, just patching + up DT_DYNAMIC will suffice. + + Note: adjust_dynamic_segment() will modify source->shdr_info[] + to denote any change in a relocation section's size. This + will be picked up by adjust_elf, which will rearrange the + file to eliminate the gap created by the decrease in size + of the relocation section. We do not need to do this, but + the relocation section could be large, and reduced + drastically by the prelinking process, so it pays to + adjust the file. + */ + + INFO("\n\n\tADJUSTING DYNAMIC SEGMENT (ACTUAL)\n\n"); + adjust_dynamic_segment(source, false); + } +#ifdef SUPPORT_ANDROID_PRELINK_TAGS + else INFO("[%s] is already prelinked at 0x%08lx.\n", + filename, + source->prelink_base); +#endif + } else INFO("[%s] has been processed already.\n", filename); + + return source; +} + +void apriori(char **execs, int num_execs, + char *output, + void (*report_library_size_in_memory)( + const char *name, off_t fsize), + int (*get_next_link_address)(const char *name), + int locals_only, + int dry_run, + char **lib_lookup_dirs, int num_lib_lookup_dirs, + char **default_libs, int num_default_libs, + char *mapfile) +{ + source_t *source; /* for general usage */ + int input_idx; + + ASSERT(report_library_size_in_memory != NULL); + ASSERT(get_next_link_address != NULL); + + /* Process and prelink each executable and object file. Function + process_file() is called for each executable in the loop below. + It calls itself recursively for each library. We prelink each library + after prelinking its dependencies. */ + int total_num_handled_relocs = 0, total_num_unhandled_relocs = 0; + for (input_idx = 0; input_idx < num_execs; input_idx++) { + INFO("executable: [%s]\n", execs[input_idx]); + /* Here process_file() is actually processing the top-level + executable files. */ + process_file(execs[input_idx], output, num_execs == 1, + report_library_size_in_memory, + get_next_link_address, /* executables get a link address + of zero, regardless of this + value */ + locals_only, + lib_lookup_dirs, num_lib_lookup_dirs, + default_libs, num_default_libs, + dry_run, + &total_num_handled_relocs, + &total_num_unhandled_relocs); + /* if source is NULL, then the respective executable is static */ + /* Mark the source as an executable */ + } /* for each input executable... */ + + PRINT("Handled %d relocations.\n", total_num_handled_relocs); + PRINT("Could not handle %d relocations.\n", total_num_unhandled_relocs); + + /* We are done! Since the end result of our calculations is a set of + symbols for each library that other libraries or executables link + against, we iterate over the set of libraries one last time, and for + each symbol that is marked as satisfying some dependence, we emit + a line with the symbol's name to a text file derived from the library's + name by appending the suffix .syms to it. */ + + if (mapfile != NULL) { + const char *mapfile_name = mapfile; + FILE *fp; + if (*mapfile == '+') { + mapfile_name = mapfile + 1; + INFO("Opening map file %s for append/write.\n", + mapfile_name); + fp = fopen(mapfile_name, "a"); + } + else fp = fopen(mapfile_name, "w"); + + FAILIF(fp == NULL, "Cannot open file [%s]: %s (%d)!\n", + mapfile_name, + strerror(errno), + errno); + source = sources; + while (source) { + /* If it's a library, print the results. */ + if (source->elf_hdr.e_type == ET_DYN) { + /* Add to the memory map file. */ + fprintf(fp, "%s 0x%08lx %lld\n", + basename(source->name), + source->base, + source->elf_file_info.st_size); + } + source = source->next; + } + fclose(fp); + } + + /* Free the resources--you can't do it in the loop above because function + print_symbol_references() accesses nodes other than the one being + iterated over. + */ + source = sources; + while (source) { + source_t *old = source; + source = source->next; + /* Destroy the evidence. */ + destroy_source(old); + } +} diff --git a/tools/apriori/apriori.h b/tools/apriori/apriori.h new file mode 100644 index 0000000..5e396fd --- /dev/null +++ b/tools/apriori/apriori.h @@ -0,0 +1,14 @@ +#ifndef LSD_H +#define LSD_H + +void apriori(char **execs, int num_execs, + char *output, + void (*set_next_link_address)(const char *name, off_t fsize), + int (*get_next_link_address)(const char *name), + int locals_only, + int dry_run, + char **lib_lookup_dirs, int num_lib_lookup_dirs, + char **default_libs, int num_default_libs, + char *mapfile); + +#endif diff --git a/tools/apriori/cmdline.c b/tools/apriori/cmdline.c new file mode 100644 index 0000000..95f112a --- /dev/null +++ b/tools/apriori/cmdline.c @@ -0,0 +1,186 @@ +#include <debug.h> +#include <cmdline.h> +#include <stdio.h> +#include <stdlib.h> +#include <getopt.h> +#include <string.h> +#include <ctype.h> + +extern char *optarg; +extern int optind, opterr, optopt; + +static struct option long_options[] = { + {"start-address", required_argument, 0, 's'}, + {"inc-address", required_argument, 0, 'i'}, + {"locals-only", no_argument, 0, 'l'}, + {"quiet", no_argument, 0, 'Q'}, + {"noupdate", no_argument, 0, 'n'}, + {"lookup", required_argument, 0, 'L'}, + {"default", required_argument, 0, 'D'}, + {"verbose", no_argument, 0, 'V'}, + {"help", no_argument, 0, 'h'}, + {"mapfile", required_argument, 0, 'M'}, + {"output", required_argument, 0, 'o'}, + {"prelinkmap", required_argument, 0, 'p'}, + {0, 0, 0, 0}, +}; + +/* This array must parallel long_options[] */ +static const char *descriptions[] = { + "start address to prelink libraries to", + "address increment for each library", + "prelink local relocations only", + "suppress informational and non-fatal error messages", + "do a dry run--calculate the prelink info but do not update any files", + "provide a directory for library lookup", + "provide a default library or executable for symbol lookup", + "print verbose output", + "print help screen", + "print a list of prelink addresses to file (prefix filename with + to append instead of overwrite)", + "specify an output directory (if multiple inputs) or file (is single input)", + "specify a file with prelink addresses instead of a --start-address/--inc-address combination", +}; + +void print_help(const char *name) { + fprintf(stdout, + "invokation:\n" + "\t%s file1 [file2 file3 ...] -Ldir1 [-Ldir2 ...] -saddr -iinc [-Vqn] [-M<logfile>]\n" + "\t%s -l file [-Vqn] [-M<logfile>]\n" + "\t%s -h\n\n", name, name, name); + fprintf(stdout, "options:\n"); + struct option *opt = long_options; + const char **desc = descriptions; + while (opt->name) { + fprintf(stdout, "\t-%c/--%s%s: %s\n", + opt->val, + opt->name, + (opt->has_arg ? " (argument)" : ""), + *desc); + opt++; + desc++; + } +} + +int get_options(int argc, char **argv, + int *start_addr, + int *inc_addr, + int *locals_only, + int *quiet, + int *dry_run, + char ***dirs, + int *num_dirs, + char ***defaults, + int *num_defaults, + int *verbose, + char **mapfile, + char **output, + char **prelinkmap) { + int c; + + ASSERT(dry_run); *dry_run = 0; + ASSERT(quiet); *quiet = 0; + ASSERT(verbose); *verbose = 0; + ASSERT(dirs); *dirs = NULL; + ASSERT(num_dirs); *num_dirs = 0; + ASSERT(defaults); *defaults = NULL; + ASSERT(num_defaults); *num_defaults = 0; + ASSERT(start_addr); *start_addr = -1; + ASSERT(inc_addr); *inc_addr = -1; + ASSERT(locals_only); *locals_only = 0; + ASSERT(mapfile); *mapfile = NULL; + ASSERT(output); *output = NULL; + ASSERT(prelinkmap); *prelinkmap = NULL; + int dirs_size = 0; + int defaults_size = 0; + + while (1) { + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long (argc, argv, + "VhnQlL:D:s:i:M:o:p:", + long_options, + &option_index); + /* Detect the end of the options. */ + if (c == -1) break; + + if (isgraph(c)) { + INFO ("option -%c with value `%s'\n", c, (optarg ?: "(null)")); + } + +#define SET_STRING_OPTION(name) do { \ + ASSERT(optarg); \ + (*name) = strdup(optarg); \ +} while(0) + +#define SET_REPEATED_STRING_OPTION(arr, num, size) do { \ + if (*num == size) { \ + size += 10; \ + *arr = (char **)REALLOC(*arr, size * sizeof(char *)); \ + } \ + SET_STRING_OPTION(((*arr) + *num)); \ + (*num)++; \ +} while(0) + +#define SET_INT_OPTION(val) do { \ + ASSERT(optarg); \ + if (strlen(optarg) >= 2 && optarg[0] == '0' && optarg[1] == 'x') { \ + FAILIF(1 != sscanf(optarg+2, "%x", val), \ + "Expecting a hexadecimal argument!\n"); \ + } else { \ + FAILIF(1 != sscanf(optarg, "%d", val), \ + "Expecting a decimal argument!\n"); \ + } \ +} while(0) + + switch (c) { + case 0: + /* If this option set a flag, do nothing else now. */ + if (long_options[option_index].flag != 0) + break; + INFO ("option %s", long_options[option_index].name); + if (optarg) + INFO (" with arg %s", optarg); + INFO ("\n"); + break; + case 'Q': *quiet = 1; break; + case 'n': *dry_run = 1; break; + case 'M': + SET_STRING_OPTION(mapfile); + break; + case 'o': + SET_STRING_OPTION(output); + break; + case 'p': + SET_STRING_OPTION(prelinkmap); + break; + case 's': + SET_INT_OPTION(start_addr); + break; + case 'i': + SET_INT_OPTION(inc_addr); + break; + case 'L': + SET_REPEATED_STRING_OPTION(dirs, num_dirs, dirs_size); + break; + case 'D': + SET_REPEATED_STRING_OPTION(defaults, num_defaults, defaults_size); + break; + case 'l': *locals_only = 1; break; + case 'h': print_help(argv[0]); exit(1); break; + case 'V': *verbose = 1; break; + case '?': + /* getopt_long already printed an error message. */ + break; + +#undef SET_STRING_OPTION +#undef SET_REPEATED_STRING_OPTION +#undef SET_INT_OPTION + + default: + FAILIF(1, "Unknown option"); + } + } + + return optind; +} diff --git a/tools/apriori/cmdline.h b/tools/apriori/cmdline.h new file mode 100644 index 0000000..8f7f394 --- /dev/null +++ b/tools/apriori/cmdline.h @@ -0,0 +1,21 @@ +#ifndef CMDLINE_H +#define CMDLINE_H + +void print_help(const char *executable_name); + +int get_options(int argc, char **argv, + int *start_addr, + int *addr_increment, + int *locals_only, + int *quiet, + int *dry_run, + char ***dirs, + int *num_dirs, + char ***defaults, + int *num_defaults, + int *verbose, + char **mapfile, + char **output, + char **prelinkmap); + +#endif/*CMDLINE_H*/ diff --git a/tools/apriori/common.h b/tools/apriori/common.h new file mode 100644 index 0000000..f5d9d2e --- /dev/null +++ b/tools/apriori/common.h @@ -0,0 +1,28 @@ +#ifndef COMMON_H +#define COMMON_H + +#include <libelf.h> +#include <elf.h> + +#define unlikely(expr) __builtin_expect (expr, 0) +#define likely(expr) __builtin_expect (expr, 1) + +#define MIN(a,b) ((a)<(b)?(a):(b)) /* no side effects in arguments allowed! */ + +static inline int is_host_little(void) +{ + short val = 0x10; + return ((char *)&val)[0] != 0; +} + +static inline long switch_endianness(long val) +{ + long newval; + ((char *)&newval)[3] = ((char *)&val)[0]; + ((char *)&newval)[2] = ((char *)&val)[1]; + ((char *)&newval)[1] = ((char *)&val)[2]; + ((char *)&newval)[0] = ((char *)&val)[3]; + return newval; +} + +#endif/*COMMON_H*/ diff --git a/tools/apriori/debug.c b/tools/apriori/debug.c new file mode 100644 index 0000000..263e09f --- /dev/null +++ b/tools/apriori/debug.c @@ -0,0 +1,38 @@ +#include <debug.h> +#include <stdio.h> +#include <ctype.h> + +#define NUM_COLS (32) + +int dump_hex_buffer(FILE *s, void *b, size_t len, size_t elsize) { + int num_nonprintable = 0; + int i, last; + char *pchr = (char *)b; + fputc('\n', s); + fprintf(s, "%p: ", b); + for (i = last = 0; i < len; i++) { + if (!elsize) { + if (i && !(i % 4)) fprintf(s, " "); + if (i && !(i % 8)) fprintf(s, " "); + } else { + if (i && !(i % elsize)) fprintf(s, " "); + } + + if (i && !(i % NUM_COLS)) { + while (last < i) { + if (isprint(pchr[last])) + fputc(pchr[last], s); + else { + fputc('.', s); + num_nonprintable++; + } + last++; + } + fprintf(s, " (%d)\n%p: ", i, b); + } + fprintf(s, "%02x", (unsigned char)pchr[i]); + } + if (i && (i % NUM_COLS)) fputs("\n", s); + return num_nonprintable; +} + diff --git a/tools/apriori/debug.h b/tools/apriori/debug.h new file mode 100644 index 0000000..3996898 --- /dev/null +++ b/tools/apriori/debug.h @@ -0,0 +1,88 @@ +#ifndef DEBUG_H +#define DEBUG_H + +#include <stdlib.h> +#include <stdio.h> +#include <common.h> + +#ifdef DEBUG + + #define FAILIF(cond, msg...) do { \ + if (unlikely(cond)) { \ + fprintf(stderr, "%s(%d): ", __FILE__, __LINE__); \ + fprintf(stderr, ##msg); \ + exit(1); \ + } \ +} while(0) + +/* Debug enabled */ + #define ASSERT(x) do { \ + if (unlikely(!(x))) { \ + fprintf(stderr, \ + "ASSERTION FAILURE %s:%d: [%s]\n", \ + __FILE__, __LINE__, #x); \ + exit(1); \ + } \ +} while(0) + +#else + + #define FAILIF(cond, msg...) do { \ + if (unlikely(cond)) { \ + fprintf(stderr, ##msg); \ + exit(1); \ + } \ +} while(0) + +/* No debug */ + #define ASSERT(x) do { } while(0) + +#endif/* DEBUG */ + +#define FAILIF_LIBELF(cond, function) \ + FAILIF(cond, "%s(): %s\n", #function, elf_errmsg(elf_errno())); + +static inline void *MALLOC(unsigned int size) { + void *m = malloc(size); + FAILIF(NULL == m, "malloc(%d) failed!\n", size); + return m; +} + +static inline void *CALLOC(unsigned int num_entries, unsigned int entry_size) { + void *m = calloc(num_entries, entry_size); + FAILIF(NULL == m, "calloc(%d, %d) failed!\n", num_entries, entry_size); + return m; +} + +static inline void *REALLOC(void *ptr, unsigned int size) { + void *m = realloc(ptr, size); + FAILIF(NULL == m, "realloc(%p, %d) failed!\n", ptr, size); + return m; +} + +static inline void FREE(void *ptr) { + free(ptr); +} + +static inline void FREEIF(void *ptr) { + if (ptr) FREE(ptr); +} + +#define PRINT(x...) do { \ + extern int quiet_flag; \ + if(likely(!quiet_flag)) \ + fprintf(stdout, ##x); \ +} while(0) + +#define ERROR PRINT + +#define INFO(x...) do { \ + extern int verbose_flag; \ + if(unlikely(verbose_flag)) \ + fprintf(stdout, ##x); \ +} while(0) + +/* Prints a hex and ASCII dump of the selected buffer to the selected stream. */ +int dump_hex_buffer(FILE *s, void *b, size_t l, size_t elsize); + +#endif/*DEBUG_H*/ diff --git a/tools/apriori/hash.c b/tools/apriori/hash.c new file mode 100644 index 0000000..9f1a614 --- /dev/null +++ b/tools/apriori/hash.c @@ -0,0 +1,27 @@ +#include <common.h> +#include <debug.h> +#include <libelf.h> +#include <hash.h> +#include <string.h> + +int hash_lookup(Elf *elf, + Elf_Data *hash, + Elf_Data *symtab, + Elf_Data *symstr, + const char *symname) { + Elf32_Word *hash_data = (Elf32_Word *)hash->d_buf; + Elf32_Word index; + Elf32_Word nbuckets = *hash_data++; + Elf32_Word *buckets = ++hash_data; + Elf32_Word *chains = hash_data + nbuckets; + + index = buckets[elf_hash(symname) % nbuckets]; + while (index != STN_UNDEF && + strcmp((char *)symstr->d_buf + + ((Elf32_Sym *)symtab->d_buf)[index].st_name, + symname)) { + index = chains[index]; + } + + return index; +} diff --git a/tools/apriori/hash.h b/tools/apriori/hash.h new file mode 100644 index 0000000..af29b9e --- /dev/null +++ b/tools/apriori/hash.h @@ -0,0 +1,14 @@ +#ifndef HASH_H +#define HASH_H + +#include <common.h> +#include <libelf.h> +#include <gelf.h> + +int hash_lookup(Elf *elf, + Elf_Data *hash, + Elf_Data *symtab, + Elf_Data *symstr, + const char *symname); + +#endif/*HASH_H*/ diff --git a/tools/apriori/main.c b/tools/apriori/main.c new file mode 100644 index 0000000..552392a --- /dev/null +++ b/tools/apriori/main.c @@ -0,0 +1,229 @@ +/* TODO: + 1. check the ARM EABI version--this works for versions 1 and 2. + 2. use a more-intelligent approach to finding the symbol table, + symbol-string table, and the .dynamic section. + 3. fix the determination of the host and ELF-file endianness + 4. write the help screen +*/ + +#include <stdio.h> +#include <common.h> +#include <debug.h> +#include <libelf.h> +#include <elf.h> +#include <gelf.h> +#include <cmdline.h> +#include <string.h> +#include <errno.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <apriori.h> +#include <prelinkmap.h> + +/* Flag set by --verbose. This variable is global as it is accessed by the + macro INFO() in multiple compilation unites. */ +int verbose_flag = 0; +/* Flag set by --quiet. This variable is global as it is accessed by the + macro PRINT() in multiple compilation unites. */ +int quiet_flag = 0; +static void print_dynamic_symbols(Elf *elf, const char *symtab_name); + +static unsigned s_next_link_addr; +static off_t s_addr_increment; + +static void report_library_size_in_memory(const char *name, off_t fsize) +{ + ASSERT(s_next_link_addr != -1UL); + INFO("Setting next link address (current is at 0x%08x):\n", + s_next_link_addr); + if (s_addr_increment) { + FAILIF(s_addr_increment < fsize, + "Command-line-specified address increment of 0x%08llx (%lld) " + "less than file [%s]'s size of %lld bytes!\n", + s_addr_increment, s_addr_increment, name, fsize); + FAILIF(s_next_link_addr % 4096, + "User-provided address increment 0x%08lx " + "is not page-aligned!\n", + s_addr_increment); + INFO("\tignoring file size, adjusting by address increment.\n"); + s_next_link_addr += s_addr_increment; + } + else { + INFO("\tuser address increment is zero, adjusting by file size.\n"); + s_next_link_addr += fsize; + s_next_link_addr &= ~(4096 - 1); + } + INFO("\t[%s] file size 0x%08lx\n", + name, + fsize); + INFO("\tnext prelink address: 0x%08x\n", s_next_link_addr); + ASSERT(!(s_next_link_addr % 4096)); /* New address must be page-aligned */ +} + +static unsigned get_next_link_address(const char *name) { + return s_next_link_addr; +} + +int main(int argc, char **argv) { + /* Do not issue INFO() statements before you call get_options() to set + the verbose flag as necessary. + */ + + char **lookup_dirs, **default_libs; + char *mapfile, *output, *prelinkmap; + int start_addr, inc_addr, locals_only, num_lookup_dirs, + num_default_libs, dry_run; + int first = get_options(argc, argv, + &start_addr, &inc_addr, &locals_only, + &quiet_flag, + &dry_run, + &lookup_dirs, &num_lookup_dirs, + &default_libs, &num_default_libs, + &verbose_flag, + &mapfile, + &output, + &prelinkmap); + + /* Perform some command-line-parameter checks. */ + int cmdline_err = 0; + if (first == argc) { + ERROR("You must specify at least one input ELF file!\n"); + cmdline_err++; + } + /* We complain when the user does not specify a start address for + prelinking when the user does not pass the locals_only switch. The + reason is that we will have a collection of executables, which we always + prelink to zero, and shared libraries, which we prelink at the specified + prelink address. When the user passes the locals_only switch, we do not + fail if the user does not specify start_addr, because the file to + prelink may be an executable, and not a shared library. At this moment, + we do not know what the case is. We find that out when we call function + init_source(). + */ + if (!locals_only && start_addr == -1) { + ERROR("You must specify --start-addr!\n"); + cmdline_err++; + } + if (start_addr == -1 && inc_addr != -1) { + ERROR("You must provide a start address if you provide an " + "address increment!\n"); + cmdline_err++; + } + if (prelinkmap != NULL && start_addr != -1) { + ERROR("You may not provide a prelink-map file (-p) and use -s/-i " + "at the same time!\n"); + cmdline_err++; + } + if (inc_addr == 0) { + ERROR("You may not specify a link-address increment of zero!\n"); + cmdline_err++; + } + if (locals_only) { + if (argc - first == 1) { + if (inc_addr != -1) { + ERROR("You are prelinking a single file; there is no point in " + "specifying a prelink-address increment!\n"); + /* This is nonfatal error, but paranoia is healthy. */ + cmdline_err++; + } + } + if (lookup_dirs != NULL || default_libs != NULL) { + ERROR("You are prelinking local relocations only; there is " + "no point in specifying lookup directories!\n"); + /* This is nonfatal error, but paranoia is healthy. */ + cmdline_err++; + } + } + + /* If there is an output option, then that must specify a file, if there is + a single input file, or a directory, if there are multiple input + files. */ + if (output != NULL) { + struct stat output_st; + FAILIF(stat(output, &output_st) < 0 && errno != ENOENT, + "stat(%s): %s (%d)\n", + output, + strerror(errno), + errno); + + if (argc - first == 1) { + FAILIF(!errno && !S_ISREG(output_st.st_mode), + "you have a single input file: -o must specify a " + "file name!\n"); + } + else { + FAILIF(errno == ENOENT, + "you have multiple input files: -o must specify a " + "directory name, but %s does not exist!\n", + output); + FAILIF(!S_ISDIR(output_st.st_mode), + "you have multiple input files: -o must specify a " + "directory name, but %s is not a directory!\n", + output); + } + } + + if (cmdline_err) { + print_help(argv[0]); + FAILIF(1, "There are command-line-option errors.\n"); + } + + /* Check to see whether the ELF library is current. */ + FAILIF (elf_version(EV_CURRENT) == EV_NONE, "libelf is out of date!\n"); + + if (inc_addr < 0) { + if (!locals_only) + PRINT("User has not provided an increment address, " + "will use library size to calculate successive " + "prelink addresses.\n"); + inc_addr = 0; + } + + void (*func_report_library_size_in_memory)(const char *name, off_t fsize); + unsigned (*func_get_next_link_address)(const char *name); + + if (prelinkmap != NULL) { + INFO("Reading prelink addresses from prelink-map file [%s].\n", + prelinkmap); + pm_init(prelinkmap); + func_report_library_size_in_memory = pm_report_library_size_in_memory; + func_get_next_link_address = pm_get_next_link_address; + } + else { + INFO("Start address: 0x%x\n", start_addr); + INFO("Increment address: 0x%x\n", inc_addr); + s_next_link_addr = start_addr; + s_addr_increment = inc_addr; + func_report_library_size_in_memory = report_library_size_in_memory; + func_get_next_link_address = get_next_link_address; + } + + /* Prelink... */ + apriori(&argv[first], argc - first, output, + func_report_library_size_in_memory, func_get_next_link_address, + locals_only, + dry_run, + lookup_dirs, num_lookup_dirs, + default_libs, num_default_libs, + mapfile); + + FREEIF(mapfile); + FREEIF(output); + if (lookup_dirs) { + ASSERT(num_lookup_dirs); + while (num_lookup_dirs--) + FREE(lookup_dirs[num_lookup_dirs]); + FREE(lookup_dirs); + } + if (default_libs) { + ASSERT(num_default_libs); + while (num_default_libs--) + FREE(default_libs[num_default_libs]); + FREE(default_libs); + } + + return 0; +} diff --git a/tools/apriori/prelink_info.c b/tools/apriori/prelink_info.c new file mode 100644 index 0000000..da7ca05 --- /dev/null +++ b/tools/apriori/prelink_info.c @@ -0,0 +1,106 @@ +#ifdef SUPPORT_ANDROID_PRELINK_TAGS + +#include <sys/types.h> +#include <fcntl.h> +#include <sys/types.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> + +#include <prelink_info.h> +#include <debug.h> +#include <common.h> + +typedef struct { + int32_t mmap_addr; + char tag[4]; /* 'P', 'R', 'E', ' ' */ +} prelink_info_t __attribute__((packed)); + +static inline void set_prelink(long *prelink_addr, + int elf_little, + prelink_info_t *info) +{ + FAILIF(sizeof(prelink_info_t) != 8, "Unexpected sizeof(prelink_info_t) == %d!\n", sizeof(prelink_info_t)); + if (prelink_addr) { + if (!(elf_little ^ is_host_little())) { + /* Same endianness */ + *prelink_addr = info->mmap_addr; + } + else { + /* Different endianness */ + *prelink_addr = switch_endianness(info->mmap_addr); + } + } +} + +int check_prelinked(const char *fname, int elf_little, long *prelink_addr) +{ + FAILIF(sizeof(prelink_info_t) != 8, "Unexpected sizeof(prelink_info_t) == %d!\n", sizeof(prelink_info_t)); + int fd = open(fname, O_RDONLY); + FAILIF(fd < 0, "open(%s, O_RDONLY): %s (%d)!\n", + fname, strerror(errno), errno); + off_t end = lseek(fd, 0, SEEK_END); + + int nr = sizeof(prelink_info_t); + + off_t sz = lseek(fd, -nr, SEEK_CUR); + ASSERT((long)(end - sz) == (long)nr); + FAILIF(sz == (off_t)-1, + "lseek(%d, 0, SEEK_END): %s (%d)!\n", + fd, strerror(errno), errno); + + prelink_info_t info; + int num_read = read(fd, &info, nr); + FAILIF(num_read < 0, + "read(%d, &info, sizeof(prelink_info_t)): %s (%d)!\n", + fd, strerror(errno), errno); + FAILIF(num_read != sizeof(info), + "read(%d, &info, sizeof(prelink_info_t)): did not read %d bytes as " + "expected (read %d)!\n", + fd, sizeof(info), num_read); + + int prelinked = 0; + if (!strncmp(info.tag, "PRE ", 4)) { + set_prelink(prelink_addr, elf_little, &info); + prelinked = 1; + } + FAILIF(close(fd) < 0, "close(%d): %s (%d)!\n", fd, strerror(errno), errno); + return prelinked; +} + +void setup_prelink_info(const char *fname, int elf_little, long base) +{ + FAILIF(sizeof(prelink_info_t) != 8, "Unexpected sizeof(prelink_info_t) == %d!\n", sizeof(prelink_info_t)); + int fd = open(fname, O_WRONLY); + FAILIF(fd < 0, + "open(%s, O_WRONLY): %s (%d)\n" , + fname, strerror(errno), errno); + prelink_info_t info; + off_t sz = lseek(fd, 0, SEEK_END); + FAILIF(sz == (off_t)-1, + "lseek(%d, 0, SEEK_END): %s (%d)!\n", + fd, strerror(errno), errno); + + if (!(elf_little ^ is_host_little())) { + /* Same endianness */ + INFO("Host and ELF file [%s] have same endianness.\n", fname); + info.mmap_addr = base; + } + else { + /* Different endianness */ + INFO("Host and ELF file [%s] have different endianness.\n", fname); + info.mmap_addr = switch_endianness(base); + } + strncpy(info.tag, "PRE ", 4); + + int num_written = write(fd, &info, sizeof(info)); + FAILIF(num_written < 0, + "write(%d, &info, sizeof(info)): %s (%d)\n", + fd, strerror(errno), errno); + FAILIF(sizeof(info) != num_written, + "Could not write %d bytes (wrote only %d bytes) as expected!\n", + sizeof(info), num_written); + FAILIF(close(fd) < 0, "close(%d): %s (%d)!\n", fd, strerror(errno), errno); +} + +#endif /*SUPPORT_ANDROID_PRELINK_TAGS*/ diff --git a/tools/apriori/prelink_info.h b/tools/apriori/prelink_info.h new file mode 100644 index 0000000..e2787cb --- /dev/null +++ b/tools/apriori/prelink_info.h @@ -0,0 +1,9 @@ +#ifndef PRELINK_INFO_H +#define PRELINK_INFO_H +#ifdef SUPPORT_ANDROID_PRELINK_TAGS + +int check_prelinked(const char *fname, int elf_little, long *prelink_addr); +void setup_prelink_info(const char *fname, int elf_little, long base); + +#endif +#endif/*PRELINK_INFO_H*/ diff --git a/tools/apriori/prelinkmap.c b/tools/apriori/prelinkmap.c new file mode 100644 index 0000000..739c181 --- /dev/null +++ b/tools/apriori/prelinkmap.c @@ -0,0 +1,139 @@ +#include <prelinkmap.h> +#include <debug.h> +#include <errno.h> +#include <string.h> +#include <libgen.h> +#include <ctype.h> + +typedef struct mapentry mapentry; + +struct mapentry +{ + mapentry *next; + unsigned base; + char name[0]; +}; + +static mapentry *maplist = 0; + +/* These values limit the address range within which we prelinked libraries + reside. The limit is not set in stone, but should be observed in the + prelink map, or the prelink step will fail. +*/ + +#define PRELINK_MIN 0x90000000 +#define PRELINK_MAX 0xB0000000 + +void pm_init(const char *file) +{ + unsigned line = 0; + char buf[256]; + char *x; + unsigned n; + FILE *fp; + mapentry *me; + unsigned last = -1UL; + + fp = fopen(file, "r"); + FAILIF(fp == NULL, "Error opening file %s: %s (%d)\n", + file, strerror(errno), errno); + + while(fgets(buf, 256, fp)){ + x = buf; + line++; + + /* eat leading whitespace */ + while(isspace(*x)) x++; + + /* comment or blank line? skip! */ + if(*x == '#') continue; + if(*x == 0) continue; + + /* skip name */ + while(*x && !isspace(*x)) x++; + + if(*x) { + *x++ = 0; + /* skip space before address */ + while(*x && isspace(*x)) x++; + } + + /* no address? complain. */ + if(*x == 0) { + fprintf(stderr,"warning: %s:%d no base address specified\n", + file, line); + continue; + } + + n = strtoul(x, 0, 16); + /* Note that this is not the only bounds check. If a library's size + exceeds its slot as defined in the prelink map, the prelinker will + exit with an error. See pm_report_library_size_in_memory(). + */ + FAILIF((n < PRELINK_MIN) || (n > PRELINK_MAX), + "%s:%d base 0x%08x out of range.\n", + file, line, n); + + me = malloc(sizeof(mapentry) + strlen(buf) + 1); + FAILIF(me == NULL, "Out of memory parsing %s\n", file); + + FAILIF(last <= n, "The prelink map is not in descending order " + "at entry %s (%08x)!\n", buf, n); + last = n; + + me->base = n; + strcpy(me->name, buf); + me->next = maplist; + maplist = me; + } + + fclose(fp); +} + +/* apriori() calls this function when it determine the size of a library + in memory. pm_report_library_size_in_memory() makes sure that the library + fits in the slot provided by the prelink map. +*/ +void pm_report_library_size_in_memory(const char *name, + off_t fsize) +{ + char *x; + mapentry *me; + + x = strrchr(name,'/'); + if(x) name = x+1; + + for(me = maplist; me; me = me->next){ + if(!strcmp(name, me->name)) { + off_t slot = me->next ? me->next->base : PRELINK_MAX; + slot -= me->base; + FAILIF(fsize > slot, + "prelink map error: library %s@0x%08x is too big " + "at %lld bytes, it runs %lld bytes into " + "library %s@0x%08x!\n", + me->name, me->base, fsize, fsize - slot, + me->next->name, me->next->base); + break; + } + } + + FAILIF(!me,"library '%s' not in prelink map\n", name); +} + +unsigned pm_get_next_link_address(const char *lookup_name) +{ + char *x; + mapentry *me; + + x = strrchr(lookup_name,'/'); + if(x) lookup_name = x+1; + + for(me = maplist; me; me = me->next){ + if(!strcmp(lookup_name, me->name)) { + return me->base; + } + } + + FAILIF(1==1,"library '%s' not in prelink map\n", lookup_name); + return 0; +} diff --git a/tools/apriori/prelinkmap.h b/tools/apriori/prelinkmap.h new file mode 100644 index 0000000..17f7660 --- /dev/null +++ b/tools/apriori/prelinkmap.h @@ -0,0 +1,10 @@ +#ifndef PRELINKMAP_H +#define PRELINKMAP_H + +#include <sys/types.h> + +extern void pm_init(const char *file); +extern void pm_report_library_size_in_memory(const char *name, off_t fsize); +extern unsigned pm_get_next_link_address(const char *name); + +#endif/*PRELINKMAP_H*/ diff --git a/tools/apriori/rangesort.c b/tools/apriori/rangesort.c new file mode 100644 index 0000000..b0295e8 --- /dev/null +++ b/tools/apriori/rangesort.c @@ -0,0 +1,317 @@ +#include <common.h> +#include <debug.h> +#include <rangesort.h> + +#define PARALLEL_ARRAY_SIZE (5) + +struct range_list_t { + range_t *array; +#ifdef DEBUG + int is_sorted; +#endif + int array_length; + int num_ranges; +}; + +range_list_t* init_range_list(void) { + range_list_t *ranges = (range_list_t *)MALLOC(sizeof(range_list_t)); + + ranges->array = (range_t *)MALLOC(PARALLEL_ARRAY_SIZE*sizeof(range_t)); + ranges->array_length = PARALLEL_ARRAY_SIZE; + ranges->num_ranges = 0; +#ifdef DEBUG + ranges->is_sorted = 0; +#endif + return ranges; +} + +void destroy_range_list(range_list_t *ranges) { + int idx; + for (idx = 0; idx < ranges->num_ranges; idx++) { + if (ranges->array[idx].user_dtor) { + ASSERT(ranges->array[idx].user); + ranges->array[idx].user_dtor(ranges->array[idx].user); + } + } + FREE(ranges->array); + FREE(ranges); +} + +static inline int CONTAINS(range_t *container, range_t *contained) { + return container->start <= contained->start && contained->length && + (container->start + container->length > + contained->start + contained->length); +} + +static inline int IN_RANGE(range_t *range, GElf_Off point) { + return + range->start <= point && + point < (range->start + range->length); +} + +static inline int INTERSECT(range_t *left, range_t *right) { + return + (IN_RANGE(left, right->start) && + IN_RANGE(right, left->start + left->length)) || + (IN_RANGE(right, left->start) && + IN_RANGE(left, right->start + right->length)); +} + +static int range_cmp_for_search(const void *l, const void *r) { + range_t *left = (range_t *)l, *right = (range_t *)r; + if (INTERSECT(left, right) || + CONTAINS(left, right) || + CONTAINS(right, left)) { + return 0; + } + return left->start - right->start; +} + +static inline void run_checks(const void *l, const void *r) { + range_t *left = (range_t *)l, *right = (range_t *)r; + if (CONTAINS(left, right)) { + if (left->err_fn) + left->err_fn(ERROR_CONTAINS, left, right); + FAILIF(1, "Range sorting error: [%lld, %lld) contains [%lld, %lld)!\n", + left->start, left->start + left->length, + right->start, right->start + right->length); + } + if (CONTAINS(right, left)) { + if (right->err_fn) + right->err_fn(ERROR_CONTAINS, left, right); + FAILIF(1, "Range sorting error: [%lld, %lld) contains [%lld, %lld)!\n", + right->start, right->start + right->length, + left->start, left->start + left->length); + } + if (INTERSECT(left, right)) { + if (left->err_fn) + left->err_fn(ERROR_OVERLAPS, left, right); + FAILIF(1, "Range sorting error: [%lld, %lld)and [%lld, %lld) intersect!\n", + left->start, left->start + left->length, + right->start, right->start + right->length); + } +} + +static int range_cmp(const void *l, const void *r) { + run_checks(l, r); + range_t *left = (range_t *)l, *right = (range_t *)r; + return left->start - right->start; +} + +void add_unique_range_nosort( + range_list_t *ranges, + GElf_Off start, + GElf_Off length, + void *user, + void (*err_fn)(range_error_t, range_t *, range_t *), + void (*user_dtor)(void * )) +{ + if (ranges->num_ranges == ranges->array_length) { + ranges->array_length += PARALLEL_ARRAY_SIZE; + ranges->array = REALLOC(ranges->array, + ranges->array_length*sizeof(range_t)); + } + ranges->array[ranges->num_ranges].start = start; + ranges->array[ranges->num_ranges].length = length; + ranges->array[ranges->num_ranges].user = user; + ranges->array[ranges->num_ranges].err_fn = err_fn; + ranges->array[ranges->num_ranges].user_dtor = user_dtor; + ranges->num_ranges++; +} + +range_list_t *sort_ranges(range_list_t *ranges) { + if (ranges->num_ranges > 1) + qsort(ranges->array, ranges->num_ranges, sizeof(range_t), range_cmp); + ranges->is_sorted = 1; + return ranges; +} + +range_t *find_range(range_list_t *ranges, GElf_Off value) { +#if 1 + int i; + for (i = 0; i < ranges->num_ranges; i++) { + if (ranges->array[i].start <= value && + value < ranges->array[i].start + ranges->array[i].length) + return ranges->array + i; + } + return NULL; +#else + ASSERT(ranges->is_sorted); /* The range list must be sorted */ + range_t lookup; + lookup.start = value; + lookup.length = 0; + return + (range_t *)bsearch(&lookup, + ranges->array, ranges->num_ranges, sizeof(range_t), + range_cmp_for_search); +#endif +} + +int get_num_ranges(const range_list_t *ranges) +{ + return ranges->num_ranges; +} + +range_t *get_sorted_ranges(const range_list_t *ranges, int *num_ranges) { + ASSERT(ranges->is_sorted); /* The range list must be sorted */ + if (num_ranges) { + *num_ranges = ranges->num_ranges; + } + return ranges->array; +} + +GElf_Off get_last_address(const range_list_t *ranges) { + ASSERT(ranges->num_ranges); + return + ranges->array[ranges->num_ranges-1].start + + ranges->array[ranges->num_ranges-1].length; +} + +static void handle_range_error(range_error_t err, + range_t *left, range_t *right) { + switch (err) { + case ERROR_CONTAINS: + ERROR("ERROR: section (%lld, %lld bytes) contains " + "section (%lld, %lld bytes)\n", + left->start, left->length, + right->start, right->length); + break; + case ERROR_OVERLAPS: + ERROR("ERROR: Section (%lld, %lld bytes) intersects " + "section (%lld, %lld bytes)\n", + left->start, left->length, + right->start, right->length); + break; + default: + ASSERT(!"Unknown range error code!"); + } + + FAILIF(1, "Range error.\n"); +} + +static void destroy_contiguous_range_info(void *user) { + contiguous_range_info_t *info = (contiguous_range_info_t *)user; + FREE(info->ranges); + FREE(info); +} + +static void handle_contiguous_range_error(range_error_t err, + range_t *left, + range_t *right) +{ + contiguous_range_info_t *left_data = + (contiguous_range_info_t *)left->user; + ASSERT(left_data); + contiguous_range_info_t *right_data = + (contiguous_range_info_t *)right->user; + ASSERT(right_data); + + PRINT("Contiguous-range overlap error. Printing contained ranges:\n"); + int cnt; + PRINT("\tLeft ranges:\n"); + for (cnt = 0; cnt < left_data->num_ranges; cnt++) { + PRINT("\t\t[%lld, %lld)\n", + left_data->ranges[cnt].start, + left_data->ranges[cnt].start + left_data->ranges[cnt].length); + } + PRINT("\tRight ranges:\n"); + for (cnt = 0; cnt < right_data->num_ranges; cnt++) { + PRINT("\t\t[%lld, %lld)\n", + right_data->ranges[cnt].start, + right_data->ranges[cnt].start + right_data->ranges[cnt].length); + } + + handle_range_error(err, left, right); +} + +range_list_t* get_contiguous_ranges(const range_list_t *input) +{ + ASSERT(input); + FAILIF(!input->is_sorted, + "get_contiguous_ranges(): input range list is not sorted!\n"); + + range_list_t* ret = init_range_list(); + int num_ranges; + range_t *ranges = get_sorted_ranges(input, &num_ranges); + + int end_idx = 0; + while (end_idx < num_ranges) { + int start_idx = end_idx++; + int old_end_idx = start_idx; + int total_length = ranges[start_idx].length; + while (end_idx < num_ranges) { + if (ranges[old_end_idx].start + ranges[old_end_idx].length != + ranges[end_idx].start) + break; + old_end_idx = end_idx++; + total_length += ranges[old_end_idx].length; + } + + contiguous_range_info_t *user = + (contiguous_range_info_t *)MALLOC(sizeof(contiguous_range_info_t)); + user->num_ranges = end_idx - start_idx; + user->ranges = (range_t *)MALLOC(user->num_ranges * sizeof(range_t)); + int i; + for (i = 0; i < end_idx - start_idx; i++) + user->ranges[i] = ranges[start_idx + i]; + add_unique_range_nosort(ret, + ranges[start_idx].start, + total_length, + user, + handle_contiguous_range_error, + destroy_contiguous_range_info); + } + + return ret; +} + +range_list_t* subtract_ranges(const range_list_t *r, const range_list_t *s) +{ + ASSERT(r); ASSERT(r->is_sorted); + ASSERT(s); ASSERT(s->is_sorted); + + range_list_t *result = init_range_list(); + + int r_num_ranges, r_idx; + range_t *r_ranges = get_sorted_ranges(r, &r_num_ranges); + ASSERT(r_ranges); + + int s_num_ranges, s_idx; + range_t *s_ranges = get_sorted_ranges(s, &s_num_ranges); + ASSERT(s_ranges); + + s_idx = 0; + for (r_idx = 0; r_idx < r_num_ranges; r_idx++) { + GElf_Off last_start = r_ranges[r_idx].start; + for (; s_idx < s_num_ranges; s_idx++) { + if (CONTAINS(&r_ranges[r_idx], &s_ranges[s_idx])) { + if (last_start == + r_ranges[r_idx].start + r_ranges[r_idx].length) { + break; + } + if (last_start == s_ranges[s_idx].start) { + last_start += s_ranges[s_idx].length; + continue; + } + INFO("Adding subtracted range [%lld, %lld)\n", + last_start, + s_ranges[s_idx].start); + add_unique_range_nosort( + result, + last_start, + s_ranges[s_idx].start - last_start, + NULL, + NULL, + NULL); + last_start = s_ranges[s_idx].start + s_ranges[s_idx].length; + } else { + ASSERT(!INTERSECT(&r_ranges[r_idx], &s_ranges[s_idx])); + break; + } + } /* while (s_idx < s_num_ranges) */ + } /* for (r_idx = 0; r_idx < r_num_ranges; r_idx++) */ + + return result; +} + + diff --git a/tools/apriori/rangesort.h b/tools/apriori/rangesort.h new file mode 100644 index 0000000..21db357 --- /dev/null +++ b/tools/apriori/rangesort.h @@ -0,0 +1,105 @@ +#ifndef RANGESORT_H +#define RANGESORT_H + +/* This implements a simple sorted list of non-overlapping ranges. */ + +#include <debug.h> +#include <common.h> +#include <gelf.h> + +typedef enum range_error_t { + ERROR_CONTAINS, + ERROR_OVERLAPS +} range_error_t; + +typedef struct range_t range_t; +struct range_t { + GElf_Off start; + GElf_Off length; + void *user; + void (*err_fn)(range_error_t, range_t *, range_t *); + void (*user_dtor)(void *); +}; + +typedef struct range_list_t range_list_t; + +range_list_t* init_range_list(); +void destroy_range_list(range_list_t *); + +/* Just adds a range to the list. We won't detect whether the range overlaps + other ranges or contains them, or is contained by them, till we call + sort_ranges(). */ +void add_unique_range_nosort(range_list_t *ranges, + GElf_Off start, GElf_Off length, + void *user, + void (*err_fn)(range_error_t, range_t *, range_t *), + void (*user_dtor)(void * )); + +/* Sorts the ranges. If there are overlapping ranges or ranges that contain + other ranges, it will cause the program to exit with a FAIL. */ +range_list_t* sort_ranges(range_list_t *ranges); +/* Find which range value falls in. Return that range or NULL if value does + not fall within any range. */ +range_t *find_range(range_list_t *ranges, GElf_Off value); +int get_num_ranges(const range_list_t *ranges); +range_t *get_sorted_ranges(const range_list_t *ranges, int *num_ranges); +GElf_Off get_last_address(const range_list_t *ranges); + +/* This returns a range_list_t handle that contains ranges composed of the + adjacent ranges of the input range list. The user data of each range in + the range list is a structure of the type contiguous_range_info_t. + This structure contains an array of pointers to copies of the original + range_t structures comprising each new contiguous range, as well as the + length of that array. + + NOTE: The input range must be sorted! + + NOTE: destroy_range_list() will take care of releasing the data that it + allocates as a result of calling get_contiguous_ranges(). Do not free that + data yourself. + + NOTE: the user data of the original range_t structures is simply copied, so + be careful handling it. You can destroy the range_list_t with + destroy_range_list() as usual. On error, the function does not return--the + program terminates. + + NOTE: The returned range is not sorted. You must call sort_ranges() if you + need to. +*/ + +typedef struct { + int num_ranges; + range_t *ranges; +} contiguous_range_info_t; + +range_list_t* get_contiguous_ranges(const range_list_t *); + +/* The function below takes in two range lists: r and s, and subtracts the + ranges in s from those in r. For example, if r and s are as follows: + + r = { [0, 10) } + s = { [3, 5), [7, 9) } + + Then r - s is { [0, 3), [5, 7), [9, 10) } + + NOTE: Both range lists must be sorted on input. This is guarded by an + assertion. + + NOTE: Range s must contain ranges, which are fully contained by the span of + range r (the span being the interval between the start of the lowest + range in r, inclusive, and the end of the highest range in r, + exclusive). + + NOTE: In addition to the requirement above, range s must contain ranges, + each of which is a subrange of one of the ranges of r. + + NOTE: There is no user info associated with the resulting range. + + NOTE: The resulting range is not sorted. + + Ther returned list must be destroyed with destroy_range_list(). +*/ + +range_list_t* subtract_ranges(const range_list_t *r, const range_list_t *s); + +#endif/*RANGESORT_H*/ diff --git a/tools/apriori/source.c b/tools/apriori/source.c new file mode 100644 index 0000000..69c57c7 --- /dev/null +++ b/tools/apriori/source.c @@ -0,0 +1,18 @@ +#include <source.h> + +void find_section(source_t *source, Elf64_Addr address, + Elf_Scn **scn, + GElf_Shdr *shdr, + Elf_Data **data) +{ + range_t *range = find_range(source->sorted_sections, address); + FAILIF(NULL == range, + "Cannot match address %lld to any range in [%s]!\n", + address, + source->name); + *scn = (Elf_Scn *)range->user; + ASSERT(*scn); + FAILIF_LIBELF(NULL == gelf_getshdr(*scn, shdr), gelf_getshdr); + *data = elf_getdata(*scn, NULL); + FAILIF_LIBELF(NULL == *data, elf_getdata); +} diff --git a/tools/apriori/source.h b/tools/apriori/source.h new file mode 100644 index 0000000..a5d96bd --- /dev/null +++ b/tools/apriori/source.h @@ -0,0 +1,121 @@ +#ifndef SOURCE_H +#define SOURCE_H + +#include <sys/types.h> +#include <sys/stat.h> +#include <libelf.h> +#include <libebl.h> +#ifdef ARM_SPECIFIC_HACKS + #include <libebl_arm.h> +#endif/*ARM_SPECIFIC_HACKS*/ +#include <elf.h> +#include <gelf.h> +#include <rangesort.h> +#include <elfcopy.h> + +typedef struct source_t source_t; + +typedef struct { + Elf_Scn *scn; + GElf_Shdr shdr; + Elf_Data *data; + shdr_info_t *info; +} section_info_t; + +typedef struct { + GElf_Rel *rels; + int num_rels; /* number of relocations that were not finished */ + int rels_size; /* this is the size of rels[], NOT the number of rels! */ +} unfinished_relocation_t; + +typedef struct { + int processed; + size_t idx; /* index of DT entry in the .dynamic section, if entry has a ptr value */ + Elf64_Addr addr; /* if DT entry's value is an address, we save it here */ + size_t sz_idx; /* index of DT entry in the .dynamic section, if entry has a size value */ + Elf64_Xword size; /* if DT entry's value is a size, we save it here */ + + range_list_t *sections; /* list of sections corresponding to this entry */ + int num_unfinished_relocs; /* this variables is populated by adjust_dynamic_segment_for() + during the second pass of the prelinker */ +} dt_rel_info_t; + +struct source_t { + source_t *next; + + char *name; /* full path name of this executable file */ + char *output; /* name of the output file or directory */ + int output_is_dir; /* nonzero if output is a directory, 0 if output is a file */ + /* ELF-related information: */ + Elf *oldelf; + Elf *elf; + /* info[] is an array of structures describing the sections of the new ELF + file. We populate the info[] array in clone_elf(), and use it to + adjust the size of the ELF file when we modify the relocation-entry + section. + */ + shdr_info_t *shdr_info; + GElf_Ehdr old_ehdr_mem; /* store ELF header of original library */ + GElf_Ehdr ehdr_mem; /* store ELF header of new library */ + GElf_Phdr *phdr_info; + Ebl *ebl; + Elf_Data *shstrtab_data; + int elf_fd; + int newelf_fd; /* fd of output file, -1 if output == NULL */ + struct stat elf_file_info; + GElf_Ehdr elf_hdr, oldelf_hdr; + size_t shstrndx; + int shnum; /* number of sections */ + int dry_run; /* 0 if we do not update the files, 1 (default) otherwise */ + + section_info_t symtab; + section_info_t strtab; + section_info_t dynamic; + section_info_t hash; + section_info_t bss; + + range_list_t *sorted_sections; + + section_info_t *relocation_sections; /* relocation sections in file */ + int num_relocation_sections; /* number of relocation sections (<= relocation_sections_size) */ + int relocation_sections_size; /* sice of array -- NOT number of relocs! */ + + /* relocation sections that contain relocations that could not be handled. + This array is parallel to relocation_sections, and for each entry + in that array, it contains a list of relocations that could not be + handled. + */ + unfinished_relocation_t *unfinished; + + /* The sections field of these two structuer contains a list of elements + of the member variable relocations. */ + dt_rel_info_t rel; + dt_rel_info_t jmprel; + + int num_syms; /* number of symbols in symbol table. This is the length of + both exports[] and satisfied[] arrays. */ + + /* This is an array that contains one element for each library dependency + listed in the executable or shared library. */ + source_t **lib_deps; /* list of library dependencies */ + int num_lib_deps; /* actual number of library dependencies */ + int lib_deps_size; /* size of lib_deps array--NOT actual number of deps! */ + + /* This is zero for executables. For shared libraries, it is the address + at which the library was prelinked. */ + unsigned base; +#ifdef SUPPORT_ANDROID_PRELINK_TAGS + /* When we read in a file, if it has the prelinked tag, we set prelinked + to 1 and the prelink address in the tag to prelink_base. This address + must match the value of base that we choose. */ + int prelinked; + long prelink_base; /* valid if prelinked != 0 */ +#endif/*SUPPORT_ANDROID_PRELINK_TAGS*/ +}; + +extern void find_section(source_t *source, Elf64_Addr address, + Elf_Scn **scn, + GElf_Shdr *shdr, + Elf_Data **data); + +#endif/*SOURCE_H*/ diff --git a/tools/apriori/tweak.h b/tools/apriori/tweak.h new file mode 100755 index 0000000..3afedee --- /dev/null +++ b/tools/apriori/tweak.h @@ -0,0 +1,15 @@ +#ifndef TWEAK_H
+#define TWEAK_H
+
+#include <source.h>
+
+/* This function will break up the .bss section into multiple subsegments,
+ depending on whether the .bss segment contains copy-relocated symbols. This
+ will produce a nonstandard ELF file (with multiple .bss sections), tht the
+ linker will need to know how to handle. The return value is the number of
+ segments that the .bss segment was broken into (zero if the .bss segment was
+ not modified. */
+
+int tweak_bss_if_necessary(source_t *source);
+
+#endif/*TWEAK_H*/
|