#include #include #include #include #include #include #ifdef ARM_SPECIFIC_HACKS #include #endif/*ARM_SPECIFIC_HACKS*/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef ADJUST_ELF #error "ADJUST_ELF must be defined!" #endif /* When this macro is defined, apriori sets to ZERO those relocation values for which it canot find the appropriate referent. */ #define PERMISSIVE #define COPY_SECTION_DATA_BUFFER (0) /* When this macro is set to a nonzero value, we replace calls to elf_strptr() on the target ELF handle with code that extracts the strings directly from the data buffers of that ELF handle. In this case, elf_strptr() does not work as expected, as it tries to read the data buffer of the associated string section directly from the file, and that buffer does not exist yet in the file, since we haven't committed our changes yet. */ #define ELF_STRPTR_IS_BROKEN (1) /* When the macro below is defined, apriori does not mark for removal those relocation sections that it fully handles. Instead, apriori just sets their sizes to zero. This is more for debugging than of any actual use. This macro is meaningful only when ADJUST_ELF!=0 */ #define REMOVE_HANDLED_SECTIONS extern int verbose_flag; static source_t *sources = NULL; /* Retouch data is a very concise representation of the resolved relocations. This data is used to randomize the location of prelinked libraries at update time, on the device. */ // We will store retouch entries into this buffer, then dump them at the // end of the .so file before setup_prelink_info(). #define RETOUCH_MAX_SIZE 600000 static char *retouch_buf; static unsigned int retouch_byte_cnt; // Compression state. static int32_t offs_prev; static uint32_t cont_prev; #define false 0 #define true 1 void retouch_init(void) { offs_prev = 0; cont_prev = 0; retouch_byte_cnt = 0; retouch_buf = malloc(RETOUCH_MAX_SIZE+12); FAILIF(retouch_buf == NULL, "Could not allocate %d bytes.\n", RETOUCH_MAX_SIZE+12); } // // We use three encoding schemes; this takes care of much of the redundancy // inherent in lists of relocations: // // * two bytes, leading 1, 2b for d_offset ("s"), 13b for d_contents ("c") // // 76543210 76543210 // 1ssccccc cccccccc // // * three bytes, leading 01, 2b for delta offset, 20b for delta contents // // 76543210 76543210 76543210 // 01sscccc cccccccc cccccccc // // * eigth bytes, leading 00, 30b for offset, 32b for contents // // 76543210 76543210 76543210 76543210 // 00ssssss ssssssss ssssssss ssssssss + 4 bytes contents // // NOTE 1: All deltas are w.r.t. the previous line in the list. // NOTE 2: Two-bit ("ss") offsets mean: "00"=4, "01"=8, "10"=12, and "11"=16. // NOTE 3: Delta contents are signed. To map back to a int32 we refill with 1s. // NOTE 4: Special encoding for -1 offset. Extended back to 32b when decoded. // void retouch_encode(int32_t offset, uint32_t contents) { int64_t d_offs = offset-offs_prev; int64_t d_cont = (int64_t)contents-(int64_t)cont_prev; uint8_t output[8]; uint32_t output_size; if ((d_offs > 3) && (d_offs % 4) == 0 && (d_offs / 4) < 5 && (d_cont < 4000) && (d_cont > -4000)) { // we can fit in 2 bytes output[0] = 0x80 | (((d_offs/4)-1) << 5) | (((uint64_t)d_cont & 0x1f00) >> 8); output[1] = ((uint64_t)d_cont & 0xff); output_size = 2; } else if ((d_offs > 3) && (d_offs % 4) == 0 && (d_offs / 4) < 5 && (d_cont < 510000) && (d_cont > -510000)) { // fit in 3 bytes output[0] = 0x40 | (((d_offs/4)-1) << 4) | (((uint64_t)d_cont & 0xf0000) >> 16); output[1] = ((uint64_t)d_cont & 0xff00) >> 8; output[2] = ((uint64_t)d_cont & 0xff); output_size = 3; } else { // fit in 8 bytes; we can't support files bigger than (1GB-1) // with this encoding: no library is that big anyway.. FAILIF(offset < -1 || offset > 0x3ffffffe, "Offset out of range.\n"); output[0] = (offset & 0x3f000000) >> 24; output[1] = (offset & 0xff0000) >> 16; output[2] = (offset & 0xff00) >> 8; output[3] = (offset & 0xff); output[4] = (contents & 0xff000000) >> 24; output[5] = (contents & 0xff0000) >> 16; output[6] = (contents & 0xff00) >> 8; output[7] = (contents & 0xff); output_size = 8; } // If this happens, the retouch buffer size can be bumped up. // Currently, the largest case is libwebcore, at about 250K. FAILIF((retouch_byte_cnt+output_size) > RETOUCH_MAX_SIZE, "About to overflow retouch buffer.\n"); memcpy(retouch_buf+retouch_byte_cnt, output, output_size); retouch_byte_cnt += output_size; offs_prev = offset; cont_prev = contents; } void retouch_dump(const char *fname, int elf_little, unsigned int retouch_byte_cnt, char *retouch_buf) { int fd = open(fname, O_WRONLY); FAILIF(fd < 0, "open(%s, O_WRONLY): %s (%d)\n" , fname, strerror(errno), errno); off_t sz = lseek(fd, 0, SEEK_END); FAILIF(sz == (off_t)-1, "lseek(%d, 0, SEEK_END): %s (%d)!\n", fd, strerror(errno), errno); // The retouch blob ends with "RETOUCH XXXX", where XXXX is the 4-byte // size of the retouch blob, in target endianness. strncpy(retouch_buf+retouch_byte_cnt, "RETOUCH ", 8); if (elf_little ^ is_host_little()) { *(unsigned int *)(retouch_buf+retouch_byte_cnt+8) = switch_endianness(retouch_byte_cnt); } else { *(unsigned int *)(retouch_buf+retouch_byte_cnt+8) = retouch_byte_cnt; } int num_written = write(fd, retouch_buf, retouch_byte_cnt+12); FAILIF(num_written < 0, "write(%d, &info, sizeof(info)): %s (%d)\n", fd, strerror(errno), errno); FAILIF((retouch_byte_cnt+12) != num_written, "Could not write %d bytes as expected (wrote %d bytes instead)!\n", retouch_byte_cnt, num_written); FAILIF(close(fd) < 0, "close(%d): %s (%d)!\n", fd, strerror(errno), errno); } /* End of retouch code. */ #if defined(DEBUG) && 0 static void print_shdr(source_t *source, Elf_Scn *scn) { GElf_Shdr shdr_mem, *shdr; shdr = gelf_getshdr(scn, &shdr_mem); Elf_Data *data = elf_getdata(scn, NULL); INFO("\t%02d: data = %p, hdr = { offset = %8lld, size = %lld }, " "data->d_buf = %p data->d_off = %lld, data->d_size = %d\n", elf_ndxscn(scn), data, shdr->sh_offset, shdr->sh_size, data->d_buf, data->d_off, data->d_size); } static void print_shdr_idx(source_t *source, Elf *elf, int idx) { print_shdr(source, elf_getscn(elf, idx)); } static void print_shdrs(source_t *source) { Elf_Scn *scn = NULL; INFO("section offset dump for new ELF\n"); while ((scn = elf_nextscn (source->elf, scn)) != NULL) print_shdr(source, scn); INFO("\nsection offset dump for original ELF\n"); while ((scn = elf_nextscn (source->oldelf, scn)) != NULL) print_shdr(source, scn); #if 0 { INFO("section offset dump for new ELF\n"); int i = 0; for (i = 0; i < source->shnum; i++) { scn = elf_getscn(source->elf, i); print_shdr(source, scn); } } #endif } #endif /* DEBUG */ static char * find_file(const char *libname, char **lib_lookup_dirs, int num_lib_lookup_dirs); static inline source_t* find_source(const char *name, char **lib_lookup_dirs, int num_lib_lookup_dirs) { char *full = find_file(name, lib_lookup_dirs, num_lib_lookup_dirs); if (full) { source_t *trav = sources; while (trav) { if (!strcmp(trav->name, full)) break; trav = trav->next; } free(full); return trav; } return NULL; } static inline void add_to_sources(source_t *src) { src->next = sources; sources = src; } static void handle_range_error(range_error_t err, range_t *left, range_t *right) { switch (err) { case ERROR_CONTAINS: ERROR("ERROR: section (%lld, %lld bytes) contains " "section (%lld, %lld bytes)\n", left->start, left->length, right->start, right->length); break; case ERROR_OVERLAPS: ERROR("ERROR: Section (%lld, %lld bytes) intersects " "section (%lld, %lld bytes)\n", left->start, left->length, right->start, right->length); break; default: ASSERT(!"Unknown range error code!"); } FAILIF(1, "Range error.\n"); } static void create_elf_sections(source_t *source, Elf *elf) { INFO("Creating new ELF sections.\n"); ASSERT(elf == NULL || source->elf == NULL || source->elf == elf); if (elf == NULL) { ASSERT(source->elf != NULL); elf = source->elf; } int cnt = 1; Elf_Scn *oldscn = NULL, *scn; while ((oldscn = elf_nextscn (source->oldelf, oldscn)) != NULL) { GElf_Shdr *oldshdr, oldshdr_mem; scn = elf_newscn(elf); FAILIF_LIBELF(NULL == scn, elf_newscn); oldshdr = gelf_getshdr(oldscn, &oldshdr_mem); FAILIF_LIBELF(NULL == oldshdr, gelf_getshdr); /* Set the section header of the new section to be the same as the headset of the old section by default. */ gelf_update_shdr(scn, oldshdr); /* Copy the section data */ Elf_Data *olddata = elf_getdata(oldscn, NULL); FAILIF_LIBELF(NULL == olddata, elf_getdata); Elf_Data *data = elf_newdata(scn); FAILIF_LIBELF(NULL == data, elf_newdata); *data = *olddata; #if COPY_SECTION_DATA_BUFFER if (olddata->d_buf != NULL) { data->d_buf = MALLOC(data->d_size); memcpy(data->d_buf, olddata->d_buf, olddata->d_size); } #endif INFO("\tsection %02d: [%-30s] created\n", cnt, elf_strptr(source->oldelf, source->shstrndx, oldshdr->sh_name)); if (ADJUST_ELF) { ASSERT(source->shdr_info != NULL); /* Create a new section. */ source->shdr_info[cnt].idx = cnt; source->shdr_info[cnt].newscn = scn; source->shdr_info[cnt].data = data; source->shdr_info[cnt]. use_old_shdr_for_relocation_calculations = 1; INFO("\tsection [%s] (old offset %lld, old size %lld) " "will have index %d (was %d).\n", source->shdr_info[cnt].name, source->shdr_info[cnt].old_shdr.sh_offset, source->shdr_info[cnt].old_shdr.sh_size, source->shdr_info[cnt].idx, elf_ndxscn(source->shdr_info[cnt].scn)); /* Same as the next assert */ ASSERT(elf_ndxscn (source->shdr_info[cnt].newscn) == source->shdr_info[cnt].idx); } ASSERT(elf_ndxscn(scn) == (size_t)cnt); cnt++; } } /* This function sets up the shdr_info[] array of a source_t. We call it only when ADJUST_ELF is non-zero (i.e., support for adjusting an ELF file for changes in sizes and numbers of relocation sections is compiled in. Note that setup_shdr_info() depends only on the information in source->oldelf, not on source->elf. */ static void setup_shdr_info(source_t *source) { if (ADJUST_ELF) { /* Allocate the section-header-info buffer. */ INFO("Allocating section-header info structure (%d) bytes...\n", source->shnum * sizeof (shdr_info_t)); source->shdr_info = (shdr_info_t *)CALLOC(source->shnum, sizeof (shdr_info_t)); /* Mark the SHT_NULL section as handled. */ source->shdr_info[0].idx = 2; int cnt = 1; Elf_Scn *oldscn = NULL; while ((oldscn = elf_nextscn (source->oldelf, oldscn)) != NULL) { /* Copy the section header */ ASSERT(elf_ndxscn(oldscn) == (size_t)cnt); /* Initialized the corresponding shdr_info entry */ { /* Mark the section with a non-zero index. Later, when we decide to drop a section, we will set its idx to zero, and assign section numbers to the remaining sections. */ source->shdr_info[cnt].idx = 1; source->shdr_info[cnt].scn = oldscn; /* NOTE: Here we pupulate the section-headset struct with the same values as the original section's. After the first run of prelink(), we will update the sh_size fields of those sections that need resizing. */ FAILIF_LIBELF(NULL == gelf_getshdr(oldscn, &source->shdr_info[cnt].shdr), gelf_getshdr); /* Get the name of the section. */ source->shdr_info[cnt].name = elf_strptr (source->oldelf, source->shstrndx, source->shdr_info[cnt].shdr.sh_name); INFO("\tname: %s\n", source->shdr_info[cnt].name); FAILIF(source->shdr_info[cnt].name == NULL, "Malformed file: section %d name is null\n", cnt); /* Remember the shdr.sh_link value. We need to remember this value for those sections that refer to other sections. For example, we need to remember it for relocation-entry sections, because if we modify the symbol table that a relocation-entry section is relative to, then we need to patch the relocation section. By the time we get to deciding whether we need to patch the relocation section, we will have overwritten its header's sh_link field with a new value. */ source->shdr_info[cnt].old_shdr = source->shdr_info[cnt].shdr; INFO("\t\toriginal sh_link: %08d\n", source->shdr_info[cnt].old_shdr.sh_link); INFO("\t\toriginal sh_addr: %lld\n", source->shdr_info[cnt].old_shdr.sh_addr); INFO("\t\toriginal sh_offset: %lld\n", source->shdr_info[cnt].old_shdr.sh_offset); INFO("\t\toriginal sh_size: %lld\n", source->shdr_info[cnt].old_shdr.sh_size); FAILIF(source->shdr_info[cnt].shdr.sh_type == SHT_SYMTAB_SHNDX, "Cannot handle sh_type SHT_SYMTAB_SHNDX!\n"); FAILIF(source->shdr_info[cnt].shdr.sh_type == SHT_GROUP, "Cannot handle sh_type SHT_GROUP!\n"); FAILIF(source->shdr_info[cnt].shdr.sh_type == SHT_GNU_versym, "Cannot handle sh_type SHT_GNU_versym!\n"); } cnt++; } /* for each section */ } /* if (ADJUST_ELF) */ } static Elf * init_elf(source_t *source, bool create_new_sections) { Elf *elf; if (source->output != NULL) { if (source->output_is_dir) { source->output_is_dir++; char *dir = source->output; int dirlen = strlen(dir); /* The main() function maintains a pointer to source->output; it frees the buffer after apriori() returns. */ source->output = MALLOC(dirlen + 1 + /* slash */ strlen(source->name) + 1); /* null terminator */ strcpy(source->output, dir); source->output[dirlen] = '/'; strcpy(source->output + dirlen + 1, basename(source->name)); } /* Save some of the info; needed for retouching (ASLR). */ retouch_init(); source->newelf_fd = open(source->output, O_RDWR | O_CREAT, 0666); FAILIF(source->newelf_fd < 0, "open(%s): %s (%d)\n", source->output, strerror(errno), errno); elf = elf_begin(source->newelf_fd, ELF_C_WRITE, NULL); FAILIF_LIBELF(elf == NULL, elf_begin); } else { elf = elf_clone(source->oldelf, ELF_C_EMPTY); FAILIF_LIBELF(elf == NULL, elf_clone); } GElf_Ehdr *oldehdr = gelf_getehdr(source->oldelf, &source->old_ehdr_mem); FAILIF_LIBELF(NULL == oldehdr, gelf_getehdr); /* Create new ELF and program headers for the elf file */ INFO("Creating empty ELF and program headers...\n"); FAILIF_LIBELF(gelf_newehdr (elf, gelf_getclass (source->oldelf)) == 0, gelf_newehdr); FAILIF_LIBELF(oldehdr->e_type != ET_REL && gelf_newphdr (elf, oldehdr->e_phnum) == 0, gelf_newphdr); /* Copy the elf header */ INFO("Copying ELF header...\n"); GElf_Ehdr *ehdr = gelf_getehdr(elf, &source->ehdr_mem); FAILIF_LIBELF(NULL == ehdr, gelf_getehdr); memcpy(ehdr, oldehdr, sizeof(GElf_Ehdr)); FAILIF_LIBELF(!gelf_update_ehdr(elf, ehdr), gelf_update_ehdr); /* Copy out the old program header: notice that if the ELF file does not have a program header, this loop won't execute. */ INFO("Copying ELF program header...\n"); { int cnt; source->phdr_info = (GElf_Phdr *)CALLOC(ehdr->e_phnum, sizeof(GElf_Phdr)); for (cnt = 0; cnt < ehdr->e_phnum; ++cnt) { INFO("\tRetrieving entry %d\n", cnt); FAILIF_LIBELF(NULL == gelf_getphdr(source->oldelf, cnt, source->phdr_info + cnt), gelf_getphdr); FAILIF_LIBELF(gelf_update_phdr (elf, cnt, source->phdr_info + cnt) == 0, gelf_update_phdr); } } /* Copy the sections and the section headers. */ if (create_new_sections) { create_elf_sections(source, elf); } /* The ELF library better follows our layout when this is not a relocatable object file. */ elf_flagelf (elf, ELF_C_SET, (ehdr->e_type != ET_REL ? ELF_F_LAYOUT : 0)); return elf; } static shdr_info_t *lookup_shdr_info_by_new_section( source_t *source, const char *sname, Elf_Scn *newscn) { if (source->shdr_info == NULL) return NULL; int cnt; for (cnt = 0; cnt < source->shnum; cnt++) { if (source->shdr_info[cnt].newscn == newscn) { INFO("\t\tnew section at %p matches shdr_info[%d], " "section [%s]!\n", newscn, cnt, source->shdr_info[cnt].name); FAILIF(strcmp(sname, source->shdr_info[cnt].name), "Matched section's name [%s] does not match " "looked-up section's name [%s]!\n", source->shdr_info[cnt].name, sname); return source->shdr_info + cnt; } } return NULL; } static bool do_init_source(source_t *source, unsigned base) { /* Find various sections. */ size_t scnidx; Elf_Scn *scn; GElf_Shdr *shdr, shdr_mem; source->sorted_sections = init_range_list(); INFO("Processing [%s]'s sections...\n", source->name); for (scnidx = 1; scnidx < (size_t)source->shnum; scnidx++) { INFO("\tGetting section index %d...\n", scnidx); scn = elf_getscn(source->elf, scnidx); if (NULL == scn) { /* If we get an error from elf_getscn(), it means that a section at the requested index does not exist. This may happen when we remove sections. Since we do not update source->shnum (we can't, since we need to know the original number of sections to know source->shdr_info[]'s length), we will attempt to retrieve a section for an index that no longer exists in the new ELF file. */ INFO("\tThere is no section at index %d anymore, continuing.\n", scnidx); continue; } shdr = gelf_getshdr(scn, &shdr_mem); FAILIF_LIBELF(NULL == shdr, gelf_getshdr); /* We haven't modified the shstrtab section, and so shdr->sh_name has the same value as before. Thus we look up the name based on the old ELF handle. We cannot use shstrndx on the new ELF handle because the index of the shstrtab section may have changed (and calling elf_getshstrndx() returns the same section index, so libelf can't handle thise ither). */ const char *sname = elf_strptr(source->oldelf, source->shstrndx, shdr->sh_name); ASSERT(sname); INFO("\tAdding [%s] (%lld, %lld)...\n", sname, shdr->sh_addr, shdr->sh_addr + shdr->sh_size); if ((shdr->sh_flags & SHF_ALLOC) == SHF_ALLOC) { add_unique_range_nosort(source->sorted_sections, shdr->sh_addr, shdr->sh_size, scn, handle_range_error, NULL); /* no user-data destructor */ } if (shdr->sh_type == SHT_DYNSYM) { source->symtab.scn = scn; source->symtab.data = elf_getdata(scn, NULL); FAILIF_LIBELF(NULL == source->symtab.data, elf_getdata); memcpy(&source->symtab.shdr, shdr, sizeof(GElf_Shdr)); source->symtab.info = lookup_shdr_info_by_new_section( source, sname, scn); ASSERT(source->shdr_info == NULL || source->symtab.info != NULL); /* The sh_link field of the section header of the symbol table contains the index of the associated strings table. */ source->strtab.scn = elf_getscn(source->elf, source->symtab.shdr.sh_link); FAILIF_LIBELF(NULL == source->strtab.scn, elf_getscn); FAILIF_LIBELF(NULL == gelf_getshdr(source->strtab.scn, &source->strtab.shdr), gelf_getshdr); source->strtab.data = elf_getdata(source->strtab.scn, NULL); FAILIF_LIBELF(NULL == source->strtab.data, elf_getdata); source->strtab.info = lookup_shdr_info_by_new_section( source, elf_strptr(source->oldelf, source->shstrndx, source->strtab.shdr.sh_name), source->strtab.scn); ASSERT(source->shdr_info == NULL || source->strtab.info != NULL); } else if (shdr->sh_type == SHT_DYNAMIC) { source->dynamic.scn = scn; source->dynamic.data = elf_getdata(scn, NULL); FAILIF_LIBELF(NULL == source->dynamic.data, elf_getdata); memcpy(&source->dynamic.shdr, shdr, sizeof(GElf_Shdr)); source->dynamic.info = lookup_shdr_info_by_new_section( source, sname, scn); ASSERT(source->shdr_info == NULL || source->dynamic.info != NULL); } else if (shdr->sh_type == SHT_HASH) { source->hash.scn = scn; source->hash.data = elf_getdata(scn, NULL); FAILIF_LIBELF(NULL == source->hash.data, elf_getdata); memcpy(&source->hash.shdr, shdr, sizeof(GElf_Shdr)); source->hash.info = lookup_shdr_info_by_new_section( source, sname, scn); ASSERT(source->shdr_info == NULL || source->hash.info != NULL); } else if (shdr->sh_type == SHT_REL || shdr->sh_type == SHT_RELA) { if (source->num_relocation_sections == source->relocation_sections_size) { source->relocation_sections_size += 5; source->relocation_sections = (section_info_t *)REALLOC(source->relocation_sections, source->relocation_sections_size * sizeof(section_info_t)); } section_info_t *reloc = source->relocation_sections + source->num_relocation_sections; reloc->scn = scn; reloc->info = lookup_shdr_info_by_new_section(source, sname, scn); ASSERT(source->shdr_info == NULL || reloc->info != NULL); reloc->data = elf_getdata(scn, NULL); FAILIF_LIBELF(NULL == reloc->data, elf_getdata); memcpy(&reloc->shdr, shdr, sizeof(GElf_Shdr)); source->num_relocation_sections++; } else if (!strcmp(sname, ".bss")) { source->bss.scn = scn; source->bss.data = elf_getdata(scn, NULL); source->bss.info = lookup_shdr_info_by_new_section( source, sname, scn); ASSERT(source->shdr_info == NULL || source->bss.info != NULL); /* The BSS section occupies no space in the ELF file. */ FAILIF_LIBELF(NULL == source->bss.data, elf_getdata) FAILIF(NULL != source->bss.data->d_buf, "Enexpected: section [%s] has data!", sname); memcpy(&source->bss.shdr, shdr, sizeof(GElf_Shdr)); } } sort_ranges(source->sorted_sections); source->unfinished = (unfinished_relocation_t *)CALLOC(source->num_relocation_sections, sizeof(unfinished_relocation_t)); if (source->dynamic.scn == NULL) { INFO("File [%s] does not have a dynamic section!\n", source->name); /* If this is a static executable, we won't update anything. */ source->dry_run = 1; return false; } FAILIF(source->symtab.scn == NULL, "File [%s] does not have a dynamic symbol table!\n", source->name); FAILIF(source->hash.scn == NULL, "File [%s] does not have a hash table!\n", source->name); FAILIF(source->hash.shdr.sh_link != elf_ndxscn(source->symtab.scn), "Hash points to section %d, not to %d as expected!\n", source->hash.shdr.sh_link, elf_ndxscn(source->symtab.scn)); /* Now, find out how many symbols we have and allocate the array of satisfied symbols. NOTE: We don't count the number of undefined symbols here; we will iterate over the symbol table later, and count them then, when it is more convenient. */ size_t symsize = gelf_fsize (source->elf, ELF_T_SYM, 1, source->elf_hdr.e_version); ASSERT(symsize); source->num_syms = source->symtab.data->d_size / symsize; source->base = (source->oldelf_hdr.e_type == ET_DYN) ? base : 0; INFO("Relink base for [%s]: 0x%lx\n", source->name, source->base); FAILIF(source->base == -1, "Can't prelink [%s]: it's a shared library and you did not " "provide a prelink address!\n", source->name); #ifdef SUPPORT_ANDROID_PRELINK_TAGS FAILIF(source->prelinked && source->base != source->prelink_base, "ERROR: file [%s] has already been prelinked for 0x%08lx. " "Cannot change to 0x%08lx!\n", source->name, source->prelink_base, source->base); #endif/*SUPPORT_ANDROID_PRELINK_TAGS*/ return true; } static source_t* init_source(const char *full_path, const char *output, int is_file, int base, int dry_run) { source_t *source = (source_t *)CALLOC(1, sizeof(source_t)); ASSERT(full_path); source->name = full_path; source->output = output; source->output_is_dir = !is_file; source->newelf_fd = -1; source->elf_fd = -1; INFO("Opening %s...\n", full_path); source->elf_fd = open(full_path, ((dry_run || output != NULL) ? O_RDONLY : O_RDWR)); FAILIF(source->elf_fd < 0, "open(%s): %s (%d)\n", full_path, strerror(errno), errno); FAILIF(fstat(source->elf_fd, &source->elf_file_info) < 0, "fstat(%s(fd %d)): %s (%d)\n", source->name, source->elf_fd, strerror(errno), errno); INFO("File [%s]'s size is %lld bytes!\n", source->name, source->elf_file_info.st_size); INFO("Calling elf_begin(%s)...\n", full_path); source->oldelf = elf_begin(source->elf_fd, (dry_run || output != NULL) ? ELF_C_READ : ELF_C_RDWR, NULL); FAILIF_LIBELF(source->oldelf == NULL, elf_begin); /* libelf can recognize COFF and A.OUT formats, but we handle only ELF. */ if(elf_kind(source->oldelf) != ELF_K_ELF) { ERROR("Input file %s is not in ELF format!\n", full_path); return NULL; } /* Make sure this is a shared library or an executable. */ { INFO("Making sure %s is a shared library or an executable...\n", full_path); FAILIF_LIBELF(0 == gelf_getehdr(source->oldelf, &source->oldelf_hdr), gelf_getehdr); FAILIF(source->oldelf_hdr.e_type != ET_DYN && source->oldelf_hdr.e_type != ET_EXEC, "%s must be a shared library (elf type is %d, expecting %d).\n", full_path, source->oldelf_hdr.e_type, ET_DYN); } #ifdef SUPPORT_ANDROID_PRELINK_TAGS /* First, check to see if the file has been prelinked. */ source->prelinked = check_prelinked(source->name, source->oldelf_hdr.e_ident[EI_DATA] == ELFDATA2LSB, &source->prelink_base); /* Note that in the INFO() below we need to use oldelf_hdr because we haven't cloned the ELF file yet, and source->elf_hdr is not defined. */ if (source->prelinked) { PRINT("%s [%s] is already prelinked at 0x%08lx!\n", (source->oldelf_hdr.e_type == ET_EXEC ? "Executable" : "Shared library"), source->name, source->prelink_base); /* Force a dry run when the file has already been prelinked */ source->dry_run = dry_run = 1; } else { INFO("%s [%s] is not prelinked!\n", (source->oldelf_hdr.e_type == ET_EXEC ? "Executable" : "Shared library"), source->name); source->dry_run = dry_run; } #endif/*SUPPORT_ANDROID_PRELINK_TAGS*/ /* Get the index of the section-header-strings-table section. */ FAILIF_LIBELF(elf_getshstrndx (source->oldelf, &source->shstrndx) < 0, elf_getshstrndx); FAILIF_LIBELF(elf_getshnum (source->oldelf, (size_t *)&source->shnum) < 0, elf_getshnum); /* When we have a dry run, or when ADJUST_ELF is enabled, we use source->oldelf for source->elf, because the former is mmapped privately, so changes to it have no effect. With ADJUST_ELF, the first run of prelink() is a dry run. We will reopen the elf file for write access after that dry run, before we call adjust_elf. */ source->elf = (ADJUST_ELF || source->dry_run) ? source->oldelf : init_elf(source, ADJUST_ELF == 0); FAILIF_LIBELF(0 == gelf_getehdr(source->elf, &source->elf_hdr), gelf_getehdr); #ifdef DEBUG ASSERT(!memcmp(&source->oldelf_hdr, &source->elf_hdr, sizeof(source->elf_hdr))); #endif /* Get the EBL handling. The -g option is currently the only reason we need EBL so dont open the backend unless necessary. */ source->ebl = ebl_openbackend (source->elf); FAILIF_LIBELF(NULL == source->ebl, ebl_openbackend); #ifdef ARM_SPECIFIC_HACKS FAILIF_LIBELF(0 != arm_init(source->elf, source->elf_hdr.e_machine, source->ebl, sizeof(Ebl)), arm_init); #endif/*ARM_SPECIFIC_HACKS*/ add_to_sources(source); if (do_init_source(source, base) == false) return NULL; return source; } /* complements do_init_source() */ static void do_destroy_source(source_t *source) { int cnt; destroy_range_list(source->sorted_sections); source->sorted_sections = NULL; for (cnt = 0; cnt < source->num_relocation_sections; cnt++) { FREEIF(source->unfinished[cnt].rels); source->unfinished[cnt].rels = NULL; source->unfinished[cnt].num_rels = 0; source->unfinished[cnt].rels_size = 0; } if (source->jmprel.sections != NULL) { destroy_range_list(source->jmprel.sections); source->jmprel.sections = NULL; } if (source->rel.sections != NULL) { destroy_range_list(source->rel.sections); source->rel.sections = NULL; } FREE(source->unfinished); /* do_init_source() */ source->unfinished = NULL; FREE(source->relocation_sections); /* do_init_source() */ source->relocation_sections = NULL; source->num_relocation_sections = source->relocation_sections_size = 0; } static void destroy_source(source_t *source) { /* Is this a little-endian ELF file? */ if (source->oldelf != source->elf) { /* If it's a dynamic executable, this must not be a dry run. */ if (!source->dry_run && source->dynamic.scn != NULL) { FAILIF_LIBELF(elf_update(source->elf, ELF_C_WRITE) == -1, elf_update); } FAILIF_LIBELF(elf_end(source->oldelf), elf_end); } ebl_closebackend(source->ebl); FAILIF_LIBELF(elf_end(source->elf), elf_end); FAILIF(close(source->elf_fd) < 0, "Could not close file %s: %s (%d)!\n", source->name, strerror(errno), errno); FAILIF((source->newelf_fd >= 0) && (close(source->newelf_fd) < 0), "Could not close output file: %s (%d)!\n", strerror(errno), errno); #ifdef SUPPORT_ANDROID_PRELINK_TAGS if (!source->dry_run) { if (source->dynamic.scn != NULL && source->elf_hdr.e_type != ET_EXEC) { /* For some reason, trying to write directly to source->elf_fd causes a "bad file descriptor" error because of something libelf does. We just close the file descriptor and open a new one in function setup_prelink_info() below. */ INFO("%s: setting up prelink tag at end of file.\n", source->output ? source->output : source->name); retouch_encode(-1, source->base); retouch_dump(source->output ? source->output : source->name, source->elf_hdr.e_ident[EI_DATA] == ELFDATA2LSB, retouch_byte_cnt, retouch_buf); setup_prelink_info(source->output ? source->output : source->name, source->elf_hdr.e_ident[EI_DATA] == ELFDATA2LSB, source->base); } else INFO("%s: executable, NOT setting up prelink tag.\n", source->name); } #endif/*SUPPORT_ANDROID_PRELINK_TAGS*/ do_destroy_source(source); if (retouch_buf != NULL) { free(retouch_buf); retouch_buf = NULL; } if (source->shstrtab_data != NULL) FREEIF(source->shstrtab_data->d_buf); /* adjust_elf */ FREE(source->lib_deps); /* list of library dependencies (process_file()) */ FREEIF(source->shdr_info); /* setup_shdr_info() */ FREEIF(source->phdr_info); /* init_elf() */ FREE(source->name); /* assigned to by init_source() */ /* If the output is a directory, in init_elf() we allocate a buffer where we copy the directory, a slash, and the file name. Here we free that buffer. */ if (source->output_is_dir > 1) { FREE(source->output); } FREE(source); /* init_source() */ } static void reinit_source(source_t *source) { do_destroy_source(source); do_init_source(source, source->base); { /* We've gathered all the DT_DYNAMIC entries; now we need to figure out which relocation sections fit in which range as described by the entries. Before we do so, however, we will populate the jmprel and rel members of source, as well as their sizes. */ size_t dynidx, numdyn; GElf_Dyn *dyn, dyn_mem; numdyn = source->dynamic.shdr.sh_size / source->dynamic.shdr.sh_entsize; source->rel.idx = source->rel.sz_idx = -1; source->jmprel.idx = source->jmprel.sz_idx = -1; for (dynidx = 0; dynidx < numdyn; dynidx++) { dyn = gelf_getdyn (source->dynamic.data, dynidx, &dyn_mem); FAILIF_LIBELF(NULL == dyn, gelf_getdyn); switch (dyn->d_tag) { case DT_NEEDED: break; case DT_JMPREL: INFO("reinit_source: DT_JMPREL is at index %d, 0x%08llx.\n", dynidx, dyn->d_un.d_ptr); source->jmprel.idx = dynidx; source->jmprel.addr = dyn->d_un.d_ptr; break; case DT_PLTRELSZ: INFO("reinit_source: DT_PLTRELSZ is at index %d, 0x%08llx.\n", dynidx, dyn->d_un.d_val); source->jmprel.sz_idx = dynidx; source->jmprel.size = dyn->d_un.d_val; break; case DT_REL: INFO("reinit_source: DT_REL is at index %d, 0x%08llx.\n", dynidx, dyn->d_un.d_ptr); source->rel.idx = dynidx; source->rel.addr = dyn->d_un.d_ptr; break; case DT_RELSZ: INFO("reinit_source: DT_RELSZ is at index %d, 0x%08llx.\n", dynidx, dyn->d_un.d_val); source->rel.sz_idx = dynidx; source->rel.size = dyn->d_un.d_val; break; case DT_RELA: case DT_RELASZ: FAILIF(1, "Can't handle DT_RELA and DT_RELASZ entries!\n"); break; } /* switch */ } /* for each dynamic entry... */ } } static GElf_Sym *hash_lookup_global_or_weak_symbol(source_t *lib, const char *symname, GElf_Sym *lib_sym_mem) { int lib_symidx = hash_lookup(lib->elf, lib->hash.data, lib->symtab.data, lib->strtab.data, symname); GElf_Sym sym_mem; if (SHN_UNDEF != lib_symidx) { /* We found the symbol--now check to see if it is global or weak. If this is the case, then the symbol satisfies the dependency. */ GElf_Sym *lib_sym = gelf_getsymshndx(lib->symtab.data, NULL, lib_symidx, &sym_mem, NULL); FAILIF_LIBELF(NULL == lib_sym, gelf_getsymshndx); #if ELF_STRPTR_IS_BROKEN ASSERT(!strcmp( symname, ((char *)elf_getdata(elf_getscn(lib->elf, lib->symtab.shdr.sh_link), NULL)->d_buf) + lib_sym->st_name)); #else ASSERT(!strcmp( symname, elf_strptr(lib->elf, lib->symtab.shdr.sh_link, lib_sym->st_name))); #endif if (lib_sym->st_shndx != SHN_UNDEF && (GELF_ST_BIND(lib_sym->st_info) == STB_GLOBAL || GELF_ST_BIND(lib_sym->st_info) == STB_WEAK)) { memcpy(lib_sym_mem, &sym_mem, sizeof(GElf_Sym)); return lib_sym; } } return NULL; } static source_t *lookup_symbol_in_dependencies(source_t *source, const char *symname, GElf_Sym *found_sym) { source_t *sym_source = NULL; /* return value */ /* This is an undefined symbol. Go over the list of libraries and look it up. */ size_t libidx; int found = 0; source_t *last_found = NULL; for (libidx = 0; libidx < (size_t)source->num_lib_deps; libidx++) { source_t *lib = source->lib_deps[libidx]; if (hash_lookup_global_or_weak_symbol(lib, symname, found_sym) != NULL) { sym_source = lib; if (found) { if (found == 1) { found++; ERROR("ERROR: multiple definitions found for [%s:%s]!\n", source->name, symname); ERROR("\tthis definition [%s]\n", lib->name); } ERROR("\tprevious definition [%s]\n", last_found->name); } last_found = lib; if (!found) found = 1; } } #if ELF_STRPTR_IS_BROKEN ASSERT(!sym_source || !strcmp(symname, (char *)(elf_getdata(elf_getscn( sym_source->elf, sym_source->symtab.shdr.sh_link), NULL)->d_buf) + found_sym->st_name)); #else ASSERT(!sym_source || !strcmp(symname, elf_strptr(sym_source->elf, sym_source->symtab.shdr.sh_link, found_sym->st_name))); #endif return sym_source; } static int do_prelink(source_t *source, Elf_Data *reloc_scn_data, int reloc_scn_entry_size, unfinished_relocation_t *unfinished, int locals_only, bool dry_run, char **lib_lookup_dirs, int num_lib_lookup_dirs, char **default_libs, int num_default_libs, int *num_unfinished_relocs) { int num_relocations = 0; size_t num_rels; num_rels = reloc_scn_data->d_size / reloc_scn_entry_size; INFO("\tThere are %d relocations.\n", num_rels); int rel_idx; for (rel_idx = 0; rel_idx < (size_t)num_rels; rel_idx++) { GElf_Rel *rel, rel_mem; //INFO("\tHandling relocation %d/%d\n", rel_idx, num_rels); rel = gelf_getrel(reloc_scn_data, rel_idx, &rel_mem); FAILIF_LIBELF(rel == NULL, gelf_getrel); GElf_Sym *sym = NULL, sym_mem; unsigned sym_idx = GELF_R_SYM(rel->r_info); source_t *sym_source = NULL; /* found_sym points to found_sym_mem, when sym_source != NULL, and to sym, when the sybmol is locally defined. If the symbol is not locally defined and sym_source == NULL, then sym is not defined either. */ GElf_Sym *found_sym = NULL, found_sym_mem; const char *symname = NULL; int sym_is_local = 1; if (sym_idx) { sym = gelf_getsymshndx(source->symtab.data, NULL, sym_idx, &sym_mem, NULL); FAILIF_LIBELF(NULL == sym, gelf_getsymshndx); #if ELF_STRPTR_IS_BROKEN symname = ((char *)source->strtab.data->d_buf) + sym->st_name; #else symname = elf_strptr(source->elf, elf_ndxscn(source->strtab.scn), sym->st_name); #endif /* If the symbol is defined and is either not in the BSS section, or if it is in the BSS then the relocation is not a copy relocation, then the symbol's source is this library (i.e., it is locally-defined). Otherwise, the symbol is imported. */ sym_is_local = 0; if (sym->st_shndx != SHN_UNDEF && (source->bss.scn == NULL || sym->st_shndx != elf_ndxscn(source->bss.scn) || #ifdef ARM_SPECIFIC_HACKS GELF_R_TYPE(rel->r_info) != R_ARM_COPY #else 1 #endif )) { sym_is_local = 1; } if (sym_is_local) { INFO("\t\tSymbol [%s:%s] is defined locally.\n", source->name, symname); sym_source = source; found_sym = sym; } else if (!locals_only) { sym_source = lookup_symbol_in_dependencies(source, symname, &found_sym_mem); /* The symbol was not in the list of dependencies, which by itself is an error: it means either that the symbol does not exist anywhere, or that the library which has the symbol has not been listed as a dependency in this library or executable. It could also mean (for a library) that the symbol is defined in the executable that links agsinst it, which is obviously not a good thing. These are bad things, but they do happen, which is why we have the ability to provide a list of default dependencies, including executables. Here we check to see if the symbol has been defined in any of them. */ if (NULL == sym_source) { INFO("\t\tChecking default dependencies...\n"); int i; source_t *lib, *old_sym_source = NULL; int printed_initial_error = 0; for (i = 0; i < num_default_libs; i++) { INFO("\tChecking in [%s].\n", default_libs[i]); lib = find_source(default_libs[i], lib_lookup_dirs, num_lib_lookup_dirs); FAILIF(NULL == lib, "Can't find default library [%s]!\n", default_libs[i]); if (hash_lookup_global_or_weak_symbol(lib, symname, &found_sym_mem)) { found_sym = &found_sym_mem; sym_source = lib; #if ELF_STRPTR_IS_BROKEN ASSERT(!strcmp(symname, (char *)(elf_getdata( elf_getscn( sym_source->elf, sym_source->symtab. shdr.sh_link), NULL)->d_buf) + found_sym->st_name)); #else ASSERT(!strcmp(symname, elf_strptr(sym_source->elf, sym_source->symtab.shdr.sh_link, found_sym->st_name))); #endif INFO("\tFound symbol [%s] in [%s]!\n", symname, lib->name); if (old_sym_source) { if (printed_initial_error == 0) { printed_initial_error = 1; ERROR("Multiple definition of [%s]:\n" "\t[%s]\n", symname, old_sym_source->name); } ERROR("\t[%s]\n", sym_source->name); } old_sym_source = sym_source; } else { INFO("\tCould not find symbol [%s] in default " "lib [%s]!\n", symname, lib->name); } } if (sym_source) { ERROR("ERROR: Could not find [%s:%s] in dependent " "libraries (but found in default [%s])!\n", source->name, symname, sym_source->name); } } else { found_sym = &found_sym_mem; /* We found the symbol in a dependency library. */ INFO("\t\tSymbol [%s:%s, value %lld] is imported from [%s]\n", source->name, symname, found_sym->st_value, sym_source->name); } } /* if symbol is defined in this library... */ if (!locals_only) { /* If a symbol is weak and we haven't found it, then report an error. We really need to find a way to set its value to zero. The problem is that it needs to refer to some section. */ FAILIF(NULL == sym_source && GELF_ST_BIND(sym->st_info) == STB_WEAK, "Cannot handle weak symbols yet (%s:%s <- %s).\n", source->name, symname, sym_source->name); #ifdef PERMISSIVE if (GELF_ST_BIND(sym->st_info) != STB_WEAK && NULL == sym_source) { ERROR("ERROR: Can't find symbol [%s:%s] in dependent or " "default libraries!\n", source->name, symname); } #else FAILIF(GELF_ST_BIND(sym->st_info) != STB_WEAK && NULL == sym_source, "Can't find symbol [%s:%s] in dependent or default " "libraries!\n", source->name, symname); #endif } /* if (!locals_only) */ } #if 0 // too chatty else INFO("\t\tno symbol is associated with this relocation\n"); #endif // We prelink only local symbols when locals_only == 1. bool can_relocate = true; if (!sym_is_local && (symname[0] == 'd' && symname[1] == 'l' && symname[2] != '\0' && (!strcmp(symname + 2, "open") || !strcmp(symname + 2, "close") || !strcmp(symname + 2, "sym") || !strcmp(symname + 2, "error")))) { INFO("********* NOT RELOCATING LIBDL SYMBOL [%s]\n", symname); can_relocate = false; } if (can_relocate && (sym_is_local || !locals_only)) { GElf_Shdr shdr_mem; Elf_Scn *scn; Elf_Data *data; find_section(source, rel->r_offset, &scn, &shdr_mem, &data); unsigned *dest = (unsigned*)(((char *)data->d_buf) + (rel->r_offset - shdr_mem.sh_addr)); unsigned rel_type = GELF_R_TYPE(rel->r_info); char buf[64]; INFO("\t\t%-15s ", ebl_reloc_type_name(source->ebl, GELF_R_TYPE(rel->r_info), buf, sizeof(buf))); /* Section-name offsets do not change, so we use oldelf to get the strings. This makes a difference in the second pass of the perlinker, after the call to adjust_elf, because source->shstrndx no longer contains the index of the section-header-strings table. */ const char *sname = elf_strptr( source->oldelf, source->shstrndx, shdr_mem.sh_name); switch (rel_type) { case R_ARM_JUMP_SLOT: case R_ARM_GLOB_DAT: case R_ARM_ABS32: ASSERT(data->d_buf != NULL); ASSERT(data->d_size >= rel->r_offset - shdr_mem.sh_addr); #ifdef PERMISSIVE if (sym_source == NULL) { ERROR("ERROR: Permissive relocation " "[%-15s] [%s:%s]: [0x%llx] = ZERO\n", ebl_reloc_type_name(source->ebl, GELF_R_TYPE(rel->r_info), buf, sizeof(buf)), sname, symname, rel->r_offset); if (!dry_run) *dest = 0; } else #endif { ASSERT(sym_source); INFO("[%s:%s]: [0x%llx] = 0x%llx + 0x%lx\n", sname, symname, rel->r_offset, found_sym->st_value, sym_source->base); if (!dry_run) { PRINT("WARNING: Relocation type not supported " "for retouching!"); *dest = found_sym->st_value + sym_source->base; } } num_relocations++; break; case R_ARM_RELATIVE: ASSERT(data->d_buf != NULL); ASSERT(data->d_size >= rel->r_offset - shdr_mem.sh_addr); FAILIF(sym != NULL, "Unsupported RELATIVE form (symbol != 0)...\n"); INFO("[%s:%s]: [0x%llx] = 0x%x + 0x%lx\n", sname, symname ?: "(symbol has no name)", rel->r_offset, *dest, source->base); if (!dry_run) { *dest += source->base; /* Output an entry for the ASLR touch-up process. */ retouch_encode(rel->r_offset -shdr_mem.sh_addr +shdr_mem.sh_offset, *dest); } num_relocations++; break; case R_ARM_COPY: #ifdef PERMISSIVE if (sym_source == NULL) { ERROR("ERROR: Permissive relocation " "[%-15s] [%s:%s]: NOT PERFORMING\n", ebl_reloc_type_name(source->ebl, GELF_R_TYPE(rel->r_info), buf, sizeof(buf)), sname, symname); } else #endif { ASSERT(sym); ASSERT(sym_source); GElf_Shdr src_shdr_mem; Elf_Scn *src_scn; Elf_Data *src_data; find_section(sym_source, found_sym->st_value, &src_scn, &src_shdr_mem, &src_data); INFO("Found [%s:%s (%lld)] in section [%s] .\n", sym_source->name, symname, found_sym->st_value, #if ELF_STRPTR_IS_BROKEN (((char *)elf_getdata( elf_getscn(sym_source->elf, sym_source->shstrndx), NULL)->d_buf) + src_shdr_mem.sh_name) #else elf_strptr(sym_source->elf, sym_source->shstrndx, src_shdr_mem.sh_name) #endif ); unsigned *src = NULL; if (src_data->d_buf == NULL) { #ifdef PERMISSIVE if (sym_source->bss.scn == NULL || elf_ndxscn(src_scn) != elf_ndxscn(sym_source->bss.scn)) { ERROR("ERROR: Permissive relocation (NULL source " "not from .bss) [%-15s] [%s:%s]: " "NOT PERFORMING\n", ebl_reloc_type_name(source->ebl, GELF_R_TYPE(rel->r_info), buf, sizeof(buf)), sname, symname); } #endif } else { ASSERT(src_data->d_size >= found_sym->st_value - src_shdr_mem.sh_addr); src = (unsigned*)(((char *)src_data->d_buf) + (found_sym->st_value - src_shdr_mem.sh_addr)); } ASSERT(symname); INFO("[%s:%s]: [0x%llx] <- [0x%llx] size %lld\n", sname, symname, rel->r_offset, found_sym->st_value, found_sym->st_size); #ifdef PERMISSIVE if (src_data->d_buf != NULL || (sym_source->bss.scn != NULL && elf_ndxscn(src_scn) == elf_ndxscn(sym_source->bss.scn))) #endif/*PERMISSIVE*/ { if (data->d_buf == NULL) { INFO("Incomplete relocation [%-15s] of [%s:%s].\n", ebl_reloc_type_name(source->ebl, GELF_R_TYPE(rel->r_info), buf, sizeof(buf)), sname, symname); FAILIF(unfinished == NULL, "You passed unfinished as NULL expecting " "to handle all relocations, " "but at least one cannot be handled!\n"); if (unfinished->num_rels == unfinished->rels_size) { unfinished->rels_size += 10; unfinished->rels = (GElf_Rel *)REALLOC( unfinished->rels, unfinished->rels_size * sizeof(GElf_Rel)); } unfinished->rels[unfinished->num_rels++] = *rel; num_relocations--; (*num_unfinished_relocs)++; } else { if (src_data->d_buf != NULL) { ASSERT(data->d_buf != NULL); ASSERT(data->d_size >= rel->r_offset - shdr_mem.sh_addr); if (!dry_run) { PRINT("WARNING: Relocation type not supported " "for retouching!"); memcpy(dest, src, found_sym->st_size); } } else { ASSERT(src == NULL); ASSERT(elf_ndxscn(src_scn) == elf_ndxscn(sym_source->bss.scn)); if (!dry_run) { PRINT("WARNING: Relocation type not supported " "for retouching!"); memset(dest, 0, found_sym->st_size); } } } } num_relocations++; } break; default: FAILIF(1, "Unknown relocation type %d!\n", rel_type); } // switch } // relocate else { INFO("\t\tNot relocating symbol [%s]%s\n", symname, (can_relocate ? ", relocating only locals" : ", which is a libdl symbol")); FAILIF(unfinished == NULL, "You passed unfinished as NULL expecting to handle all " "relocations, but at least one cannot be handled!\n"); if (unfinished->num_rels == unfinished->rels_size) { unfinished->rels_size += 10; unfinished->rels = (GElf_Rel *)REALLOC( unfinished->rels, unfinished->rels_size * sizeof(GElf_Rel)); } unfinished->rels[unfinished->num_rels++] = *rel; (*num_unfinished_relocs)++; } } // for each relocation entry return num_relocations; } static int prelink(source_t *source, int locals_only, bool dry_run, char **lib_lookup_dirs, int num_lib_lookup_dirs, char **default_libs, int num_default_libs, int *num_unfinished_relocs) { INFO("Prelinking [%s] (number of relocation sections: %d)%s...\n", source->name, source->num_relocation_sections, (dry_run ? " (dry run)" : "")); int num_relocations = 0; int rel_scn_idx; for (rel_scn_idx = 0; rel_scn_idx < source->num_relocation_sections; rel_scn_idx++) { section_info_t *reloc_scn = source->relocation_sections + rel_scn_idx; unfinished_relocation_t *unfinished = source->unfinished + rel_scn_idx; /* We haven't modified the shstrtab section, and so shdr->sh_name has the same value as before. Thus we look up the name based on the old ELF handle. We cannot use shstrndx on the new ELF handle because the index of the shstrtab section may have changed (and calling elf_getshstrndx() returns the same section index, so libelf can't handle thise ither). If reloc_scn->info is available, we can assert that the section-name has not changed. If this assertion fails, then we cannot use the elf_strptr() trick below to get the section name. One solution would be to save it in the section_info_t structure. */ ASSERT(reloc_scn->info == NULL || reloc_scn->shdr.sh_name == reloc_scn->info->old_shdr.sh_name); const char *sname = elf_strptr(source->oldelf, source->shstrndx, reloc_scn->shdr.sh_name); ASSERT(sname != NULL); INFO("\n\tIterating relocation section [%s]...\n", sname); /* In general, the new size of the section differs from the original size of the section, because we can handle some of the relocations. This was communicated to adjust_elf, which modified the ELF file according to the new section sizes. Now, when prelink() does the actual work of prelinking, it needs to know the original size of the relocation section so that it can see all of the original relocation entries! */ size_t d_size = reloc_scn->data->d_size; if (reloc_scn->info != NULL && reloc_scn->data->d_size != reloc_scn->info->old_shdr.sh_size) { INFO("Setting size of section [%s] to from new size %d to old " "size %lld temporarily (so prelinker can see all " "relocations).\n", reloc_scn->info->name, d_size, reloc_scn->info->old_shdr.sh_size); reloc_scn->data->d_size = reloc_scn->info->old_shdr.sh_size; } num_relocations += do_prelink(source, reloc_scn->data, reloc_scn->shdr.sh_entsize, unfinished, locals_only, dry_run, lib_lookup_dirs, num_lib_lookup_dirs, default_libs, num_default_libs, num_unfinished_relocs); if (reloc_scn->data->d_size != d_size) { ASSERT(reloc_scn->info != NULL); INFO("Resetting size of section [%s] to %d\n", reloc_scn->info->name, d_size); reloc_scn->data->d_size = d_size; } } /* Now prelink those relocation sections which were fully handled, and therefore removed. They are not a part of the source->relocation_sections[] array anymore, but we can find them by scanning source->shdr_info[] and looking for sections with idx == 0. */ if (ADJUST_ELF && source->shdr_info != NULL) { /* Walk over the shdr_info[] array to see if we've removed any relocation sections. prelink() those sections as well. */ int i; for (i = 0; i < source->shnum; i++) { shdr_info_t *info = source->shdr_info + i; if (info->idx == 0 && (info->shdr.sh_type == SHT_REL || info->shdr.sh_type == SHT_RELA)) { Elf_Data *data = elf_getdata(info->scn, NULL); ASSERT(data->d_size == 0); data->d_size = info->old_shdr.sh_size; INFO("\n\tIterating relocation section [%s], which was " "discarded (size %d, entry size %lld).\n", info->name, data->d_size, info->old_shdr.sh_entsize); num_relocations += do_prelink(source, data, info->old_shdr.sh_entsize, NULL, /* the section was fully handled */ locals_only, dry_run, lib_lookup_dirs, num_lib_lookup_dirs, default_libs, num_default_libs, num_unfinished_relocs); data->d_size = 0; } } } return num_relocations; } static char * find_file(const char *libname, char **lib_lookup_dirs, int num_lib_lookup_dirs) { if (libname[0] == '/') { /* This is an absolute path name--just return it. */ /* INFO("ABSOLUTE PATH: [%s].\n", libname); */ return strdup(libname); } else { /* First try the working directory. */ int fd; if ((fd = open(libname, O_RDONLY)) > 0) { close(fd); /* INFO("FOUND IN CURRENT DIR: [%s].\n", libname); */ return strdup(libname); } else { /* Iterate over all library paths. For each path, append the file name and see if there is a file at that place. If that fails, bail out. */ char *name; while (num_lib_lookup_dirs--) { size_t lib_len = strlen(*lib_lookup_dirs); /* one extra character for the slash, and another for the terminating NULL. */ name = (char *)MALLOC(lib_len + strlen(libname) + 2); strcpy(name, *lib_lookup_dirs); name[lib_len] = '/'; strcpy(name + lib_len + 1, libname); if ((fd = open(name, O_RDONLY)) > 0) { close(fd); /* INFO("FOUND: [%s] in [%s].\n", libname, name); */ return name; } INFO("NOT FOUND: [%s] in [%s].\n", libname, name); free(name); } } } return NULL; } static void adjust_dynamic_segment_entry_size(source_t *source, dt_rel_info_t *dyn) { /* Update the size entry in the DT_DYNAMIC segment. */ GElf_Dyn *dyn_entry, dyn_entry_mem; dyn_entry = gelf_getdyn(source->dynamic.data, dyn->sz_idx, &dyn_entry_mem); FAILIF_LIBELF(NULL == dyn_entry, gelf_getdyn); /* If we are calling this function to adjust the size of the dynamic entry, then there should be some unfinished relocations remaining. If there are none, then we should remove the entry from the dynamic section altogether. */ ASSERT(dyn->num_unfinished_relocs); size_t relsize = gelf_fsize(source->elf, ELF_T_REL, 1, source->elf_hdr.e_version); if (unlikely(verbose_flag)) { char buf[64]; INFO("Updating entry %d: [%-10s], %08llx --> %08x\n", dyn->sz_idx, ebl_dynamic_tag_name (source->ebl, dyn_entry->d_tag, buf, sizeof (buf)), dyn_entry->d_un.d_val, dyn->num_unfinished_relocs * relsize); } dyn_entry->d_un.d_val = dyn->num_unfinished_relocs * relsize; FAILIF_LIBELF(!gelf_update_dyn(source->dynamic.data, dyn->sz_idx, dyn_entry), gelf_update_dyn); } static void adjust_dynamic_segment_entries(source_t *source) { /* This function many remove entries from the dynamic segment, but it won't resize the relevant section. It'll just fill the remainted with empty DT entries. FIXME: This is not guaranteed right now. If a dynamic segment does not end with null DT entries, I think this will break. */ FAILIF(source->rel.processed, "More than one section matches DT_REL entry in dynamic segment!\n"); FAILIF(source->jmprel.processed, "More than one section matches DT_JMPREL entry in " "dynamic segment!\n"); source->rel.processed = source->jmprel.processed = 1; if (source->rel.num_unfinished_relocs > 0) adjust_dynamic_segment_entry_size(source, &source->rel); if (source->jmprel.num_unfinished_relocs > 0) adjust_dynamic_segment_entry_size(source, &source->jmprel); /* If at least one of the entries is empty, then we need to remove it. We have already adjusted the size of the other. */ if (source->rel.num_unfinished_relocs == 0 || source->jmprel.num_unfinished_relocs == 0) { /* We need to delete the DT_REL/DT_RELSZ and DT_PLTREL/DT_PLTRELSZ entries from the dynamic segment. */ GElf_Dyn *dyn_entry, dyn_entry_mem; size_t dynidx, updateidx; size_t numdyn = source->dynamic.shdr.sh_size / source->dynamic.shdr.sh_entsize; for (updateidx = dynidx = 0; dynidx < numdyn; dynidx++) { dyn_entry = gelf_getdyn(source->dynamic.data, dynidx, &dyn_entry_mem); FAILIF_LIBELF(NULL == dyn_entry, gelf_getdyn); if ((source->rel.num_unfinished_relocs == 0 && (dynidx == source->rel.idx || dynidx == source->rel.sz_idx)) || (source->jmprel.num_unfinished_relocs == 0 && (dynidx == source->jmprel.idx || dynidx == source->jmprel.sz_idx))) { if (unlikely(verbose_flag)) { char buf[64]; INFO("\t(!)\tRemoving entry %02d: [%-10s], %08llx\n", dynidx, ebl_dynamic_tag_name (source->ebl, dyn_entry->d_tag, buf, sizeof (buf)), dyn_entry->d_un.d_val); } continue; } if (unlikely(verbose_flag)) { char buf[64]; INFO("\t\tKeeping entry %02d: [%-10s], %08llx\n", dynidx, ebl_dynamic_tag_name (source->ebl, dyn_entry->d_tag, buf, sizeof (buf)), dyn_entry->d_un.d_val); } gelf_update_dyn(source->dynamic.data, updateidx, &dyn_entry_mem); updateidx++; } } } /* adjust_dynamic_segment_entries */ static bool adjust_dynamic_segment_for(source_t *source, dt_rel_info_t *dyn, bool adjust_section_size_only) { bool dropped_sections = false; /* Go over the sections that belong to this dynamic range. */ dyn->num_unfinished_relocs = 0; if (dyn->sections) { int num_scns, idx; range_t *scns = get_sorted_ranges(dyn->sections, &num_scns); INFO("\tdynamic range %s:[%lld, %lld) contains %d sections.\n", source->name, dyn->addr, dyn->addr + dyn->size, num_scns); ASSERT(scns); int next_idx = 0, next_rel_off = 0; /* The total number of unfinished relocations for this dynamic * entry. */ section_info_t *next = (section_info_t *)scns[next_idx].user; section_info_t *first = next; ASSERT(first); for (idx = 0; idx < num_scns; idx++) { section_info_t *reloc_scn = (section_info_t *)scns[idx].user; size_t rel_scn_idx = reloc_scn - source->relocation_sections; ASSERT(rel_scn_idx < (size_t)source->num_relocation_sections); unfinished_relocation_t *unfinished = &source->unfinished[rel_scn_idx]; int unf_idx; ASSERT(reloc_scn->info == NULL || reloc_scn->shdr.sh_name == reloc_scn->info->old_shdr.sh_name); const char *sname = elf_strptr(source->oldelf, source->shstrndx, reloc_scn->shdr.sh_name); INFO("\tsection [%s] contains %d unfinished relocs.\n", sname, unfinished->num_rels); for (unf_idx = 0; unf_idx < unfinished->num_rels; unf_idx++) { /* There are unfinished relocations. Copy them forward to the lowest section we can. */ while (next_rel_off == (int)(next->shdr.sh_size/next->shdr.sh_entsize)) { INFO("\tsection [%s] has filled up with %d unfinished " "relocs.\n", sname, next_rel_off); next_idx++; ASSERT(next_idx <= idx); next = (section_info_t *)scns[next_idx].user; next_rel_off = 0; } if (!adjust_section_size_only) { INFO("\t\tmoving unfinished relocation %2d to [%s:%d]\n", unf_idx, sname, next_rel_off); FAILIF_LIBELF(0 == gelf_update_rel(next->data, next_rel_off, &unfinished->rels[unf_idx]), gelf_update_rel); } next_rel_off++; dyn->num_unfinished_relocs++; } } /* for */ /* Set the size of the last section, and mark all subsequent sections for removal. At this point, next is the section to which we last wrote data, next_rel_off is the offset before which we wrote the last relocation, and so next_rel_off * relsize is the new size of the section. */ bool adjust_file = ADJUST_ELF && source->elf_hdr.e_type != ET_EXEC; if (adjust_file && !source->dry_run) { size_t relsize = gelf_fsize(source->elf, ELF_T_REL, 1, source->elf_hdr.e_version); ASSERT(next->info == NULL || next->shdr.sh_name == next->info->old_shdr.sh_name); const char *sname = elf_strptr(source->oldelf, source->shstrndx, next->shdr.sh_name); INFO("\tsection [%s] (index %d) has %d unfinished relocs, " "changing its size to %ld bytes (from %ld bytes).\n", sname, elf_ndxscn(next->scn), next_rel_off, (long)(next_rel_off * relsize), (long)(next->shdr.sh_size)); /* source->shdr_info[] must be allocated prior to calling this function. This is in fact done in process_file(), by calling setup_shdr_info() just before we call adjust_dynamic_segment(). */ ASSERT(source->shdr_info != NULL); /* We do not update the data field of shdr_info[], because it does not exist yet (with ADJUST_ELF != 0). We create the new section and section data after the first call to prelink(). For now, we save the results of our analysis by modifying the sh_size field of the section header. When we create the new sections' data, we set the size of the data from the sh_size fields of the section headers. NOTE: The assertion applies only to the first call of adjust_dynamic_segment (which calls this function). By the second call, we've already created the data for the new sections. The only sections for which we haven't created data are the relocation sections we are removing. */ #ifdef DEBUG ASSERT((!adjust_section_size_only && (source->shdr_info[elf_ndxscn(next->scn)].idx > 0)) || source->shdr_info[elf_ndxscn(next->scn)].data == NULL); #endif //FIXME: what else do we need to do here? Do we need to update // another copy of the shdr so that it's picked up when we // commit the file? next->shdr.sh_size = next_rel_off * relsize; source->shdr_info[elf_ndxscn(next->scn)].shdr.sh_size = next->shdr.sh_size; if (next_rel_off * relsize == 0) { #ifdef REMOVE_HANDLED_SECTIONS INFO("\tsection [%s] (index %d) is now empty, marking for " "removal.\n", sname, elf_ndxscn(next->scn)); source->shdr_info[elf_ndxscn(next->scn)].idx = 0; dropped_sections = true; #endif } while (++next_idx < num_scns) { next = (section_info_t *)scns[next_idx].user; #ifdef REMOVE_HANDLED_SECTIONS ASSERT(next->info == NULL || next->shdr.sh_name == next->info->old_shdr.sh_name); const char *sname = elf_strptr(source->oldelf, source->shstrndx, next->shdr.sh_name); INFO("\tsection [%s] (index %d) is now empty, marking for " "removal.\n", sname, elf_ndxscn(next->scn)); /* mark for removal */ source->shdr_info[elf_ndxscn(next->scn)].idx = 0; dropped_sections = true; #endif } } } /* if (dyn->sections) */ else { /* The dynamic entry won't have any sections when it itself doesn't exist. This could happen when we remove all relocation sections from a dynamic entry because we have managed to handle all relocations in them. */ INFO("\tNo section for dynamic entry!\n"); } return dropped_sections; } static bool adjust_dynamic_segment(source_t *source, bool adjust_section_size_only) { bool dropped_section; INFO("Adjusting dynamic segment%s.\n", (adjust_section_size_only ? " (section sizes only)" : "")); INFO("\tadjusting dynamic segment REL.\n"); dropped_section = adjust_dynamic_segment_for(source, &source->rel, adjust_section_size_only); INFO("\tadjusting dynamic segment JMPREL.\n"); dropped_section = adjust_dynamic_segment_for(source, &source->jmprel, adjust_section_size_only) || dropped_section; if (!adjust_section_size_only) adjust_dynamic_segment_entries(source); return dropped_section; } static void match_relocation_sections_to_dynamic_ranges(source_t *source) { /* We've gathered all the DT_DYNAMIC entries; now we need to figure out which relocation sections fit in which range as described by the entries. */ int relidx; for (relidx = 0; relidx < source->num_relocation_sections; relidx++) { section_info_t *reloc_scn = &source->relocation_sections[relidx]; int index = elf_ndxscn(reloc_scn->scn); ASSERT(reloc_scn->info == NULL || reloc_scn->shdr.sh_name == reloc_scn->info->old_shdr.sh_name); const char *sname = elf_strptr(source->oldelf, source->shstrndx, reloc_scn->shdr.sh_name); INFO("Checking section [%s], index %d, for match to dynamic ranges\n", sname, index); if (source->shdr_info == NULL || reloc_scn->info->idx > 0) { if (source->rel.addr && source->rel.addr <= reloc_scn->shdr.sh_addr && reloc_scn->shdr.sh_addr < source->rel.addr + source->rel.size) { /* The entire section must fit in the dynamic range. */ if((reloc_scn->shdr.sh_addr + reloc_scn->shdr.sh_size) > (source->rel.addr + source->rel.size)) { PRINT("WARNING: In [%s], section %s:[%lld,%lld) " "is not fully contained in dynamic range " "[%lld,%lld)!\n", source->name, sname, reloc_scn->shdr.sh_addr, reloc_scn->shdr.sh_addr + reloc_scn->shdr.sh_size, source->rel.addr, source->rel.addr + source->rel.size); } if (NULL == source->rel.sections) { source->rel.sections = init_range_list(); ASSERT(source->rel.sections); } add_unique_range_nosort(source->rel.sections, reloc_scn->shdr.sh_addr, reloc_scn->shdr.sh_size, reloc_scn, NULL, NULL); INFO("\tSection [%s] matches dynamic range REL.\n", sname); } else if (source->jmprel.addr && source->jmprel.addr <= reloc_scn->shdr.sh_addr && reloc_scn->shdr.sh_addr <= source->jmprel.addr + source->jmprel.size) { if((reloc_scn->shdr.sh_addr + reloc_scn->shdr.sh_size) > (source->jmprel.addr + source->jmprel.size)) { PRINT("WARNING: In [%s], section %s:[%lld,%lld) " "is not fully " "contained in dynamic range [%lld,%lld)!\n", source->name, sname, reloc_scn->shdr.sh_addr, reloc_scn->shdr.sh_addr + reloc_scn->shdr.sh_size, source->jmprel.addr, source->jmprel.addr + source->jmprel.size); } if (NULL == source->jmprel.sections) { source->jmprel.sections = init_range_list(); ASSERT(source->jmprel.sections); } add_unique_range_nosort(source->jmprel.sections, reloc_scn->shdr.sh_addr, reloc_scn->shdr.sh_size, reloc_scn, NULL, NULL); INFO("\tSection [%s] matches dynamic range JMPREL.\n", sname); } else PRINT("WARNING: Relocation section [%s:%s] does not match " "any DT_ entry.\n", source->name, sname); } else { INFO("Section [%s] was removed, not matching it to dynamic " "ranges.\n", sname); } } /* for ... */ if (source->rel.sections) sort_ranges(source->rel.sections); if (source->jmprel.sections) sort_ranges(source->jmprel.sections); } static void drop_sections(source_t *source) { INFO("We are dropping some sections from [%s]--creating section entries " "only for remaining sections.\n", source->name); /* Renumber the sections. The numbers for the sections after those we are dropping will be shifted back by the number of dropped sections. */ int cnt, idx; for (cnt = idx = 1; cnt < source->shnum; ++cnt) { if (source->shdr_info[cnt].idx > 0) { source->shdr_info[cnt].idx = idx++; /* Create a new section. */ FAILIF_LIBELF((source->shdr_info[cnt].newscn = elf_newscn(source->elf)) == NULL, elf_newscn); ASSERT(elf_ndxscn (source->shdr_info[cnt].newscn) == source->shdr_info[cnt].idx); /* Copy the section data */ Elf_Data *olddata = elf_getdata(source->shdr_info[cnt].scn, // old section NULL); FAILIF_LIBELF(NULL == olddata, elf_getdata); Elf_Data *data = elf_newdata(source->shdr_info[cnt].newscn); FAILIF_LIBELF(NULL == data, elf_newdata); *data = *olddata; #if COPY_SECTION_DATA_BUFFER if (olddata->d_buf != NULL) { data->d_buf = MALLOC(data->d_size); memcpy(data->d_buf, olddata->d_buf, olddata->d_size); } #endif source->shdr_info[cnt].data = data; if (data->d_size != source->shdr_info[cnt].shdr.sh_size) { INFO("Trimming new-section data from %d to %lld bytes " "(as calculated by adjust_dynamic_segment()).\n", data->d_size, source->shdr_info[cnt].shdr.sh_size); data->d_size = source->shdr_info[cnt].shdr.sh_size; } INFO("\tsection [%s] (old offset %lld, old size %lld) " "will have index %d (was %d), new size %d\n", source->shdr_info[cnt].name, source->shdr_info[cnt].old_shdr.sh_offset, source->shdr_info[cnt].old_shdr.sh_size, source->shdr_info[cnt].idx, elf_ndxscn(source->shdr_info[cnt].scn), data->d_size); } else { INFO("\tIgnoring section [%s] (offset %lld, size %lld, index %d), " "it will be discarded.\n", source->shdr_info[cnt].name, source->shdr_info[cnt].shdr.sh_offset, source->shdr_info[cnt].shdr.sh_size, elf_ndxscn(source->shdr_info[cnt].scn)); } /* NOTE: We mark use_old_shdr_for_relocation_calculations even for the sections we are removing. adjust_elf has an assertion that makes sure that if the values for the size of a section according to its header and its data structure differ, then we are using explicitly the old section header for calculations, and that the section in question is a relocation section. */ source->shdr_info[cnt].use_old_shdr_for_relocation_calculations = true; } /* for */ } static source_t* process_file(const char *filename, const char *output, int is_file, void (*report_library_size_in_memory)( const char *name, off_t fsize), unsigned (*get_next_link_address)( const char *name), int locals_only, char **lib_lookup_dirs, int num_lib_lookup_dirs, char **default_libs, int num_default_libs, int dry_run, int *total_num_handled_relocs, int *total_num_unhandled_relocs) { /* Look up the file in the list of already-handles files, which are represented by source_t structs. If we do not find the file, then we haven't prelinked it yet. If we find it, then we have, so we do nothing. Keep in mind that apriori operates on an entire collection of files, and if application A used library L, and so does application B, if we process A first, then by the time we get to B we will have prelinked L already; that's why we check first to see if a library has been prelinked. */ source_t *source = find_source(filename, lib_lookup_dirs, num_lib_lookup_dirs); if (NULL == source) { /* If we could not find the source, then it hasn't been processed yet, so we go ahead and process it! */ INFO("Processing [%s].\n", filename); char *full = find_file(filename, lib_lookup_dirs, num_lib_lookup_dirs); FAILIF(NULL == full, "Could not find [%s] in the current directory or in any of " "the search paths!\n", filename); unsigned base = get_next_link_address(full); source = init_source(full, output, is_file, base, dry_run); if (source == NULL) { INFO("File [%s] is a static executable.\n", filename); return NULL; } ASSERT(source->dynamic.scn != NULL); /* We need to increment the next prelink address only when the file we are currently handing is a shared library. Executables do not need to be prelinked at a different address, they are always at address zero. Also, if we are prelinking locals only, then we are handling a single file per invokation of apriori, so there is no need to increment the prelink address unless there is a global prelink map, in which case we do need to check to see if the library isn't running into its neighbouts in the prelink map. */ if (source->oldelf_hdr.e_type != ET_EXEC && (!locals_only || report_library_size_in_memory == pm_report_library_size_in_memory)) { /* This sets the next link address only if an increment was not specified by the user. If an address increment was specified, then we just check to make sure that the file size is less than the increment. NOTE: The file size is the absolute highest number of bytes that the file may occupy in memory, if the entire file is loaded, but this is almost next the case. A file will often have sections which are not loaded, which could add a lot of size. That's why we start off with the file size and then subtract the size of the biggest sections that will not get loaded, which are the varios DWARF sections, all of which of which are named starting with ".debug_". We could do better than this (by caculating exactly how many bytes from that file will be loaded), but that's an overkill. Unless the prelink-address increment becomes too small, the file size after subtracting the sizes of the DWARF section will be a good-enough upper bound. */ unsigned long fsize = source->elf_file_info.st_size; INFO("Calculating loadable file size for next link address. " "Starting with %ld.\n", fsize); if (true) { Elf_Scn *scn = NULL; GElf_Shdr shdr_mem, *shdr; const char *scn_name; while ((scn = elf_nextscn (source->oldelf, scn)) != NULL) { shdr = gelf_getshdr(scn, &shdr_mem); FAILIF_LIBELF(NULL == shdr, gelf_getshdr); scn_name = elf_strptr (source->oldelf, source->shstrndx, shdr->sh_name); ASSERT(scn_name != NULL); if (!(shdr->sh_flags & SHF_ALLOC)) { INFO("\tDecrementing by %lld on account of section " "[%s].\n", shdr->sh_size, scn_name); fsize -= shdr->sh_size; } } } INFO("Done calculating loadable file size for next link address: " "Final value is %ld.\n", fsize); report_library_size_in_memory(source->name, fsize); } /* Identify the dynamic segment and process it. Specifically, we find out what dependencies, if any, this file has. Whenever we encounter such a dependency, we process it recursively; we find out where the various relocation information sections are stored. */ size_t dynidx; GElf_Dyn *dyn, dyn_mem; size_t numdyn = source->dynamic.shdr.sh_size / source->dynamic.shdr.sh_entsize; ASSERT(source->dynamic.shdr.sh_size == source->dynamic.data->d_size); source->rel.idx = source->rel.sz_idx = -1; source->jmprel.idx = source->jmprel.sz_idx = -1; for (dynidx = 0; dynidx < numdyn; dynidx++) { dyn = gelf_getdyn (source->dynamic.data, dynidx, &dyn_mem); FAILIF_LIBELF(NULL == dyn, gelf_getdyn); /* When we are processing only the local relocations in a file, we don't need to handle any of the dependencies. It won't hurt if we do, but we will be doing unnecessary work. */ switch (dyn->d_tag) { case DT_NEEDED: if (!locals_only) { /* Process the needed library recursively. */ const char *dep_lib = #if ELF_STRPTR_IS_BROKEN (((char *)elf_getdata( elf_getscn(source->elf, source->dynamic.shdr.sh_link), NULL)->d_buf) + dyn->d_un.d_val); #else elf_strptr (source->elf, source->dynamic.shdr.sh_link, dyn->d_un.d_val); #endif ASSERT(dep_lib != NULL); INFO("[%s] depends on [%s].\n", filename, dep_lib); ASSERT(output == NULL || is_file == 0); source_t *dep = process_file(dep_lib, output, is_file, report_library_size_in_memory, get_next_link_address, locals_only, lib_lookup_dirs, num_lib_lookup_dirs, default_libs, num_default_libs, dry_run, total_num_handled_relocs, total_num_unhandled_relocs); /* Add the library to the dependency list. */ if (source->num_lib_deps == source->lib_deps_size) { source->lib_deps_size += 10; source->lib_deps = REALLOC(source->lib_deps, source->lib_deps_size * sizeof(source_t *)); } source->lib_deps[source->num_lib_deps++] = dep; } break; case DT_JMPREL: source->jmprel.idx = dynidx; source->jmprel.addr = dyn->d_un.d_ptr; break; case DT_PLTRELSZ: source->jmprel.sz_idx = dynidx; source->jmprel.size = dyn->d_un.d_val; break; case DT_REL: source->rel.idx = dynidx; source->rel.addr = dyn->d_un.d_ptr; break; case DT_RELSZ: source->rel.sz_idx = dynidx; source->rel.size = dyn->d_un.d_val; break; case DT_RELA: case DT_RELASZ: FAILIF(1, "Can't handle DT_RELA and DT_RELASZ entries!\n"); break; } /* switch */ } /* for each dynamic entry... */ INFO("Handling [%s].\n", filename); #ifdef SUPPORT_ANDROID_PRELINK_TAGS if (!source->prelinked) #endif { /* When ADJUST_ELF is defined, this call to prelink is a dry run intended to calculate the number of relocations that could not be handled. This, in turn, allows us to calculate the amount by which we can shrink the various relocation sections before we call adjust_elf. After we've adjusted the sections, we will call prelink() one more time to do the actual work. NOTE: Even when ADJUST_ELF != 0, we cannot adjust an ELF file that is an executabe, because an executable is not PIC. */ int num_unfinished_relocs = 0; bool adjust_file = ADJUST_ELF && source->elf_hdr.e_type != ET_EXEC; INFO("\n\n\tPRELINKING %s\n\n", adjust_file ? "(CALCULATE NUMBER OF HANDLED RELOCATIONS)" : "(ACTUAL)"); int num_relocs = prelink(source, locals_only, adjust_file || dry_run, lib_lookup_dirs, num_lib_lookup_dirs, default_libs, num_default_libs, &num_unfinished_relocs); INFO("[%s]: (calculate changes) handled %d, could not handle %d " "relocations.\n", source->name, num_relocs, num_unfinished_relocs); if (adjust_file && !dry_run) { /* Find out the new section sizes of the relocation sections, but do not move any relocations around, because adjust_elf needs to know about all relocations in order to adjust the file correctly. */ match_relocation_sections_to_dynamic_ranges(source); /* We haven't set up source->shdr_info[] yet, so we do it now. NOTE: setup_shdr_info() depends only on source->oldelf, not on source->elf! source->elf is not even defined yet. We initialize source->shdr_info[] based on the section information of the unmodified ELF file, and then make our modifications in the call to adjust_dynamic_segment() based on this information. adjust_dynamic_segment() will rearrange the unhandled relocations in the beginning of their relocation sections, and adjust the size of those relocation sections. In the case when a relocation section is completely handled, adjust_dynamic_segment() will mark it for removal by function adjust_elf. */ ASSERT(source->elf == source->oldelf); ASSERT(source->shdr_info == NULL); setup_shdr_info(source); ASSERT(source->shdr_info != NULL); INFO("\n\n\tADJUSTING DYNAMIC SEGMENT " "(CALCULATE CHANGES)\n\n"); bool drop_some_sections = adjust_dynamic_segment(source, true); /* Reopen the elf file! Note that we are not doing a dry run (the if statement above makes sure of that.) NOTE: We call init_elf() after we called adjust_dynamic_segment() in order to have adjust_dynamic_segment() refer to source->oldelf when it refers to source->elf. Since adjust_dynamic_segment doesn't actually write to the ELF file, this is OK. adjust_dynamic_segment() updates the sh_size fields of saved section headers and optionally marks sections for removal. Having adjust_dynamic_segment() refer to source->oldelf means that we'll have access to section-name strings so we can print them out in our logging and debug output. */ source->elf = init_elf(source, false); /* This is the same code as in init_source() after the call to * init_elf(). */ ASSERT(source->elf != source->oldelf); ebl_closebackend(source->ebl); source->ebl = ebl_openbackend (source->elf); FAILIF_LIBELF(NULL == source->ebl, ebl_openbackend); #ifdef ARM_SPECIFIC_HACKS FAILIF_LIBELF(0 != arm_init(source->elf, source->elf_hdr.e_machine, source->ebl, sizeof(Ebl)), arm_init); #endif/*ARM_SPECIFIC_HACKS*/ if (drop_some_sections) drop_sections(source); else { INFO("All sections remain in [%s]--we are changing at " "most section sizes.\n", source->name); create_elf_sections(source, NULL); int cnt, idx; for (cnt = idx = 1; cnt < source->shnum; ++cnt) { Elf_Data *data = elf_getdata( source->shdr_info[cnt].newscn, // new section NULL); if (data->d_size != source->shdr_info[cnt].shdr.sh_size) { INFO("Trimming new-section data from %d to %lld " "bytes (as calculated by " "adjust_dynamic_segment()).\n", data->d_size, source->shdr_info[cnt].shdr.sh_size); data->d_size = source->shdr_info[cnt].shdr.sh_size; } } } /* Shrink it! */ INFO("\n\n\tADJUSTING ELF\n\n"); adjust_elf( source->oldelf, source->name, source->elf, source->name, source->ebl, &source->old_ehdr_mem, NULL, 0, // no symbol filter source->shdr_info, // information on how to adjust the ELF source->shnum, // length of source->shdr_info[] source->phdr_info, // program-header info source->shnum, // irrelevant--we're not rebuilding shstrtab source->shnum, // number of sections in file source->shstrndx, // index of shstrtab (both in // shdr_info[] and as a section index) NULL, // irrelevant, since we are not rebuilding shstrtab drop_some_sections, // some sections are being dropped elf_ndxscn(source->dynamic.scn), // index of .dynamic elf_ndxscn(source->symtab.scn), // index of .dynsym 1, // allow shady business &source->shstrtab_data, true, false); // do not rebuild shstrtab INFO("\n\n\tREINITIALIZING STRUCTURES " "(TO CONTAIN ADJUSTMENTS)\n\n"); reinit_source(source); INFO("\n\n\tPRELINKING (ACTUAL)\n\n"); #ifdef DEBUG int old_num_unfinished_relocs = num_unfinished_relocs; #endif num_unfinished_relocs = 0; #ifdef DEBUG int num_relocs_take_two = #endif prelink(source, locals_only, false, /* not a dry run */ lib_lookup_dirs, num_lib_lookup_dirs, default_libs, num_default_libs, &num_unfinished_relocs); /* The numbers for the total number of relocations and the number of unhandled relocations between the first and second invokationof prelink() must be the same! The first time we ran prelink() just to calculate the numbers so that we could calculate the adjustments to pass to adjust_elf, and the second time we actually carry out the prelinking; the numbers must stay the same! */ ASSERT(num_relocs == num_relocs_take_two); ASSERT(old_num_unfinished_relocs == num_unfinished_relocs); INFO("[%s]: (actual prelink) handled %d, could not " "handle %d relocations.\n", source->name, num_relocs, num_unfinished_relocs); } /* if (adjust_elf && !dry_run) */ *total_num_handled_relocs += num_relocs; *total_num_unhandled_relocs += num_unfinished_relocs; if(num_unfinished_relocs != 0 && source->elf_hdr.e_type != ET_EXEC && !locals_only) { /* One reason you could have unfinished relocations in an executable file is if this file used dlopen() and friends. We do not adjust relocation entries to those symbols, because libdl is a dummy only--the real functions are provided for by the dynamic linker itsef. NOTE FIXME HACK: This is specific to the Android dynamic linker, and may not be true in other cases. */ PRINT("WARNING: Expecting to have unhandled relocations only " "for executables (%s is not an executable)!\n", source->name); } match_relocation_sections_to_dynamic_ranges(source); /* Now, for each relocation section, check to see if its address matches one of the DT_DYNAMIC relocation pointers. If so, then if the section has no unhandled relocations, simply set the associated DT_DYNAMIC entry's size to zero. If the section does have unhandled entries, then lump them all together at the front of the respective section and update the size of the respective DT_DYNAMIC entry to the new size of the section. A better approach would be do delete a relocation section if it has been fully relocated and to remove its entry from the DT_DYNAMIC array, and for relocation entries that still have some relocations in them, we should shrink the section if that won't violate relative offsets. This is more work, however, and for the speed improvement we expect from a prelinker, just patching up DT_DYNAMIC will suffice. Note: adjust_dynamic_segment() will modify source->shdr_info[] to denote any change in a relocation section's size. This will be picked up by adjust_elf, which will rearrange the file to eliminate the gap created by the decrease in size of the relocation section. We do not need to do this, but the relocation section could be large, and reduced drastically by the prelinking process, so it pays to adjust the file. */ INFO("\n\n\tADJUSTING DYNAMIC SEGMENT (ACTUAL)\n\n"); adjust_dynamic_segment(source, false); } #ifdef SUPPORT_ANDROID_PRELINK_TAGS else INFO("[%s] is already prelinked at 0x%08lx.\n", filename, source->prelink_base); #endif } else INFO("[%s] has been processed already.\n", filename); return source; } void apriori(char **execs, int num_execs, char *output, void (*report_library_size_in_memory)( const char *name, off_t fsize), int (*get_next_link_address)(const char *name), int locals_only, int dry_run, char **lib_lookup_dirs, int num_lib_lookup_dirs, char **default_libs, int num_default_libs, char *mapfile) { source_t *source; /* for general usage */ int input_idx; ASSERT(report_library_size_in_memory != NULL); ASSERT(get_next_link_address != NULL); /* Process and prelink each executable and object file. Function process_file() is called for each executable in the loop below. It calls itself recursively for each library. We prelink each library after prelinking its dependencies. */ int total_num_handled_relocs = 0, total_num_unhandled_relocs = 0; for (input_idx = 0; input_idx < num_execs; input_idx++) { INFO("executable: [%s]\n", execs[input_idx]); /* Here process_file() is actually processing the top-level executable files. */ process_file(execs[input_idx], output, num_execs == 1, report_library_size_in_memory, get_next_link_address, /* executables get a link address of zero, regardless of this value */ locals_only, lib_lookup_dirs, num_lib_lookup_dirs, default_libs, num_default_libs, dry_run, &total_num_handled_relocs, &total_num_unhandled_relocs); /* if source is NULL, then the respective executable is static */ /* Mark the source as an executable */ } /* for each input executable... */ PRINT("Handled %d relocations.\n", total_num_handled_relocs); PRINT("Could not handle %d relocations.\n", total_num_unhandled_relocs); /* We are done! Since the end result of our calculations is a set of symbols for each library that other libraries or executables link against, we iterate over the set of libraries one last time, and for each symbol that is marked as satisfying some dependence, we emit a line with the symbol's name to a text file derived from the library's name by appending the suffix .syms to it. */ if (mapfile != NULL) { const char *mapfile_name = mapfile; FILE *fp; if (*mapfile == '+') { mapfile_name = mapfile + 1; INFO("Opening map file %s for append/write.\n", mapfile_name); fp = fopen(mapfile_name, "a"); } else fp = fopen(mapfile_name, "w"); FAILIF(fp == NULL, "Cannot open file [%s]: %s (%d)!\n", mapfile_name, strerror(errno), errno); source = sources; while (source) { /* If it's a library, print the results. */ if (source->elf_hdr.e_type == ET_DYN) { /* Add to the memory map file. */ fprintf(fp, "%s 0x%08lx %lld\n", basename(source->name), source->base, source->elf_file_info.st_size); } source = source->next; } fclose(fp); } /* Free the resources--you can't do it in the loop above because function print_symbol_references() accesses nodes other than the one being iterated over. */ source = sources; while (source) { source_t *old = source; source = source->next; /* Destroy the evidence. */ destroy_source(old); } }