30 Embedding Files in Executable Sections
Locating external files on startup, such as the boot files needed for Racket CS, can be troublesome. An alternative to having separate files is to embed the files in an ELF or Mach-O executable as data segments or in a Windows executable as a resource. Embedding files in that way requires using OS-specific linking steps and runtime libraries.
30.1 Accessing ELF Sections on Linux
On Linux and other ELF-based systems, you can add sections to an executable using objcopy. For example, the following command copies "pre_run" to run while adding boot files as sections:
objcopy --add-section .csboot1=petite.boot \ |
--set-section-flags .csboot1=noload,readonly \ |
--add-section .csboot2=scheme.boot \ |
--set-section-flags .csboot2=noload,readonly \ |
--add-section .csboot3=racket.boot \ |
--set-section-flags .csboot3=noload,readonly \ |
./pre_run ./run |
Here’s an implementation for "pre_run" like the one in Embedding into a Program (CS), but where boot files are loaded from sections:
"main.c"
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <elf.h>
#include <fcntl.h>
#include "chezscheme.h"
#include "racketcs.h"
#include "run.c"
static char *get_self_path()
{
ssize_t len, blen = 256;
char *s = malloc(blen);
while (1) {
len = readlink("/proc/self/exe", s, blen-1);
if (len == (blen-1)) {
free(s);
blen *= 2;
s = malloc(blen);
} else if (len < 0) {
fprintf(stderr, "failed to get self (%d)\n", errno);
exit(1);
} else
return s;
}
}
static long find_section(const char *exe, const char *sectname)
{
int fd, i;
Elf64_Ehdr e;
Elf64_Shdr s;
char *strs;
fd = open(exe, O_RDONLY, 0);
if (fd != -1) {
if (read(fd, &e, sizeof(e)) == sizeof(e)) {
lseek(fd, e.e_shoff + (e.e_shstrndx * e.e_shentsize), SEEK_SET);
if (read(fd, &s, sizeof(s)) == sizeof(s)) {
strs = (char *)malloc(s.sh_size);
lseek(fd, s.sh_offset, SEEK_SET);
if (read(fd, strs, s.sh_size) == s.sh_size) {
for (i = 0; i < e.e_shnum; i++) {
lseek(fd, e.e_shoff + (i * e.e_shentsize), SEEK_SET);
if (read(fd, &s, sizeof(s)) != sizeof(s))
break;
if (!strcmp(strs + s.sh_name, sectname)) {
close(fd);
return s.sh_offset;
}
}
}
}
}
close(fd);
}
fprintf(stderr, "could not find section %s\n", sectname);
return -1;
}
int main(int argc, char *argv[])
{
racket_boot_arguments_t ba;
memset(&ba, 0, sizeof(ba));
ba.boot1_path = get_self_path();
ba.boot2_path = ba.boot1_path;
ba.boot3_path = ba.boot1_path;
ba.boot1_offset = find_section(ba.boot1_path, ".csboot1");
ba.boot2_offset = find_section(ba.boot2_path, ".csboot2");
ba.boot3_offset = find_section(ba.boot3_path, ".csboot3");
ba.exec_file = argv[0];
racket_boot(&ba);
declare_modules();
ptr mod = Scons(Sstring_to_symbol("quote"),
Scons(Sstring_to_symbol("run"),
Snil));
racket_dynamic_require(mod, Sfalse);
return 0;
}
30.2 Accessing Mac OS Sections
On Mac OS, sections can be added to a Mach-O executable using the -sectcreate compiler flag. If "main.c" is compiled and linked with
gcc main.c libracketcs.a -Ipath/to/racket/include \ |
-liconv -lncurses -framework CoreFoundation \ |
-sectcreate __DATA __rktboot1 petite.boot \ |
-sectcreate __DATA __rktboot2 scheme.boot \ |
-sectcreate __DATA __rktboot2 racket.boot |
then the executable can access is own path using _NSGetExecutablePath, and it can locate sections using getsectbyname. Here’s an example like the one in Embedding into a Program (CS):
"main.c"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "chezscheme.h"
#include "racketcs.h"
#include "run.c"
#include <mach-o/dyld.h>
#include <mach-o/getsect.h>
static char *get_self_path()
{
char *s;
uint32_t size = 0;
int r;
r = _NSGetExecutablePath(NULL, &size);
s = malloc(size+1);
r = _NSGetExecutablePath(s, &size);
if (!r)
return s;
fprintf(stderr, "could not get executable path\n");
exit(1);
}
static long find_section(char *segname, char *sectname)
{
const struct section_64 *s = getsectbyname(segname, sectname);
if (s)
return s->offset;
fprintf(stderr, "could not find segment %s section %s\n",
segname, sectname);
exit(1);
}
#endif
int main(int argc, char **argv)
{
racket_boot_arguments_t ba;
memset(&ba, 0, sizeof(ba));
ba.boot1_path = get_self_path();
ba.boot2_path = ba.boot1_path;
ba.boot3_path = ba.boot1_path;
ba.boot1_offset = find_section("__DATA", "__rktboot1");
ba.boot2_offset = find_section("__DATA", "__rktboot2");
ba.boot3_offset = find_section("__DATA", "__rktboot3");
ba.exec_file = argv[0];
ba.run_file = argv[0];
racket_boot(&ba);
declare_modules(); /* defined by "run.c" */
ptr mod = Scons(Sstring_to_symbol("quote"),
Scons(Sstring_to_symbol("run"),
Snil));
racket_dynamic_require(mod, Sfalse);
return 0;
}
30.3 Accessing Windows Resources
On Windows, data is most readily added to an executable as a resource. The following code demonstrates how to find the path to the current executable and how to find a resource in the executable by identifying number, type (usually 1) and encoding (usual 1033):
"main.c"
/* forward declaration for internal helper */
static DWORD find_by_id(HANDLE fd, DWORD rsrcs, DWORD pos, int id);
static wchar_t *get_self_executable_path()
{
wchar_t *path;
DWORD r, sz = 1024;
while (1) {
path = (wchar_t *)malloc(sz * sizeof(wchar_t));
r = GetModuleFileNameW(NULL, path, sz);
if ((r == sz)
&& (GetLastError() == ERROR_INSUFFICIENT_BUFFER)) {
free(path);
sz = 2 * sz;
} else
break;
}
return path;
}
static long find_resource_offset(wchar_t *path, int id, int type, int encoding)
{
/* Find the resource of type `id` */
HANDLE fd;
fd = CreateFileW(path, GENERIC_READ,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL,
OPEN_EXISTING,
0,
NULL);
if (fd == INVALID_HANDLE_VALUE)
return 0;
else {
DWORD val, got, sec_pos, virtual_addr, rsrcs, pos;
WORD num_sections, head_size;
char name[8];
SetFilePointer(fd, 60, 0, FILE_BEGIN);
ReadFile(fd, &val, 4, &got, NULL);
SetFilePointer(fd, val+4+2, 0, FILE_BEGIN); /* Skip "PE\0\0" tag and machine */
ReadFile(fd, &num_sections, 2, &got, NULL);
SetFilePointer(fd, 12, 0, FILE_CURRENT); /* time stamp + symbol table */
ReadFile(fd, &head_size, 2, &got, NULL);
sec_pos = val+4+20+head_size;
while (num_sections--) {
SetFilePointer(fd, sec_pos, 0, FILE_BEGIN);
ReadFile(fd, &name, 8, &got, NULL);
if ((name[0] == '.')
&& (name[1] == 'r')
&& (name[2] == 's')
&& (name[3] == 'r')
&& (name[4] == 'c')
&& (name[5] == 0)) {
SetFilePointer(fd, 4, 0, FILE_CURRENT); /* skip virtual size */
ReadFile(fd, &virtual_addr, 4, &got, NULL);
SetFilePointer(fd, 4, 0, FILE_CURRENT); /* skip file size */
ReadFile(fd, &rsrcs, 4, &got, NULL);
SetFilePointer(fd, rsrcs, 0, FILE_BEGIN);
/* We're at the resource table; step through 3 layers */
pos = find_by_id(fd, rsrcs, rsrcs, id);
if (pos) {
pos = find_by_id(fd, rsrcs, pos, type);
if (pos) {
pos = find_by_id(fd, rsrcs, pos, encoding);
if (pos) {
/* pos is the reource data entry */
SetFilePointer(fd, pos, 0, FILE_BEGIN);
ReadFile(fd, &val, 4, &got, NULL);
pos = val - virtual_addr + rsrcs;
CloseHandle(fd);
return pos;
}
}
}
break;
}
sec_pos += 40;
}
/* something went wrong */
CloseHandle(fd);
return -1;
}
}
/* internal helper function */
static DWORD find_by_id(HANDLE fd, DWORD rsrcs, DWORD pos, int id)
{
DWORD got, val;
WORD name_count, id_count;
SetFilePointer(fd, pos + 12, 0, FILE_BEGIN);
ReadFile(fd, &name_count, 2, &got, NULL);
ReadFile(fd, &id_count, 2, &got, NULL);
pos += 16 + (name_count * 8);
while (id_count--) {
ReadFile(fd, &val, 4, &got, NULL);
if (val == id) {
ReadFile(fd, &val, 4, &got, NULL);
return rsrcs + (val & 0x7FFFFFF);
} else {
ReadFile(fd, &val, 4, &got, NULL);
}
}
return 0;
}