20160403 - Compiling on Linux Without Libc


For reference, just reposting some of the inline asm bits from one of my engines to jump start compiling without libc...

Shell script to compile forces C (-x c) since I often use the cpp extension which defaults to C++, and forces no libraries except libdl (-nostdlib -ldl).

gcc -x c e.cpp -o e.bin -std=gnu99 -nostdlib -ldl ...

Note output from "ldd" will show libc even with -nostdlib because libdl depends on libc, even when the binary only ever uses say 2 external symbols from libdl {dlopen() and dlsym()}. The linux-vdso is mapped for syscall bypass kernel fast path. Some "ldd" output,

linux-vdso.so.1 (0x00007fff763f9000)
libdl.so.2 => /usr/lib/libdl.so.2 (0x00007f3cb75ac000)
libc.so.6 => /usr/lib/libc.so.6 (0x00007f3cb7209000)
/lib64/ld-linux-x86-64.so.2 (0x00007f3cb77b0000)

Rolling Your Own Main
Running without libc means jumping in from _start instead, and then doing a little assembly to setup the correct environment (note the manual stack alignment).

// Pulled from elsewhere in the engine...
#define ER_ __restrict
#define ES_ static
typedef unsigned char EU1;
typedef signed int ES4;
typedef EU1 *ER_ EU1R;

// Enter without libc,
ES_ void main(ES4 argc, EU1R *ER_ argv) { ERomMain(argc, argv); EDie(); }
__asm__(
  ".text\n"
  ".global _start\n"
  "_start:\n"
  "xor %rbp,%rbp\n"
  "pop %rdi\n"
  "mov %rsp,%rsi\n"
  "andq $-16,%rsp\n"
  "call main\n");

Syscalls
Sorry in advance this may wrap. Showing only the 64-bit x86-64 interface below. Syscalls have 0 to 6 arguments so you need just 7 inline asm functions to access any syscall. The return is often technically signed (as signed means error), but I use unsigned everywhere out of habit with a typecast when I need the signed result. I grab syscall numbers from the linux source, and make my own headers for what I need (which is not much).

// Copied from elsewhere in the engine...
#define EI_ static inline __attribute__((always_inline))
typedef unsigned long EU8;

// Linux syscall access.
EI_ EU8 ELnx0(EU8 num) { EU8 ret;
  asm volatile("syscall":"=a"(ret):"a"(num):
    "cc","memory","%rcx","%rdx","%rdi","%rsi","%r8","%r9","%r10","%r11");
  return ret; }
EI_ EU8 ELnx1(EU8 num, EU8 ar1) { EU8 ret;
  asm volatile("syscall":"=a"(ret):"a"(num),"D"(ar1):
    "cc","memory","%rcx","%rdx","%rsi","%r8","%r9","%r10","%r11");
  return ret; }
EI_ EU8 ELnx2(EU8 num, EU8 ar1, EU8 ar2) { EU8 ret;
  asm volatile("syscall":"=a"(ret):"a"(num),"D"(ar1),"S"(ar2):
    "cc","memory","%rcx","%rdx","%r8","%r9","%r10","%r11");
  return ret; }
EI_ EU8 ELnx3(EU8 num, EU8 ar1, EU8 ar2, EU8 ar3) { EU8 ret;
  asm volatile("syscall":"=a"(ret):"a"(num),"D"(ar1),"S"(ar2),"d"(ar3):
    "cc","memory","%rcx","%r8","%r9","%r10","%r11");
  return ret; }
EI_ EU8 ELnx4(EU8 num, EU8 ar1, EU8 ar2, EU8 ar3, EU8 ar4) { EU8 ret;
  register EU8 lar4 asm("r10") = ar4;
  asm volatile("syscall":"=a"(ret):"a"(num),"D"(ar1),"S"(ar2),"d"(ar3),"r"(lar4):
    "cc","memory","%rcx","%r8","%r9","%r11");
  return ret; }
EI_ EU8 ELnx5(EU8 num, EU8 ar1, EU8 ar2, EU8 ar3, EU8 ar4, EU8 ar5) { EU8 ret;
  register EU8 lar4 asm("r10") = ar4; register EU8 lar5 asm("r8") = ar5;
  asm volatile("syscall":"=a"(ret):"a"(num),"D"(ar1),"S"(ar2),"d"(ar3),"r"(lar4),"r"(lar5):
    "cc","memory","%rcx","%r9","%r11");
  return ret; }
EI_ EU8 ELnx6(EU8 num, EU8 ar1, EU8 ar2, EU8 ar3, EU8 ar4, EU8 ar5, EU8 ar6) { EU8 ret;
  register EU8 lar4 asm("r10") = ar4; register EU8 lar5 asm("r8") = ar5; register EU8 lar6 asm("r9") = ar6;
  asm volatile("syscall":"=a"(ret):"a"(num),"D"(ar1),"S"(ar2),"d"(ar3),"r"(lar4),"r"(lar5),"r"(lar6):
    "cc","memory","%rcx","%r11");
  return ret; }