20130109 - Run-time Binary Reload Detailed

I've adopted a fast iterative way to develop using hot swap of the game's compiled code at run-time. Some have asked for details, so here is a lot more than you want to know...

EDIT: Martin Scheffler added a project based on this on to GitHUB.

Automatic Recompile
In order to get automatic recompile whenever my source changes, I just open an extra terminal window, then run this shell script, which automatically recompiles the entire game as a shared object whenever the source file changes. I keep this window along side my editor window, so I can easily save and check for errors as I'm coding. And yes my entire game is in one source file.
while :; do
nowMTime=`stat -c %Y atom.c`
if ((baseMTime != nowMTime)); then
echo "----"
gcc atom.c -o atom.linux-x86-64.so -std=gnu99 -O3 -fomit-frame-pointer -msse -msse2 -msse3 -march=nocona -ffast-math -mno-ieee-fp -mfpmath=sse -D___BUILD_LINUX_GNU_X86_64___ -fno-exceptions -fno-asynchronous-unwind-tables -fno-zero-initialized-in-bss -fpic -shared
sleep 0.1

Run-time Hot Loader
Warning, I can only remember compiling and using the Linux code, assume the Windows code is broken...
Here is the loader I use to load then continously reload the compiled game source without ever leaving the game. I'll cover how this works in a bit. The important thing to note is that this makes a copy of the shared library, and loads that so the game code can get recompiled again.

                        ATOM HOT LOADER

#ifdef ___BUILD_LINUX_GNU_X86_64___
  #define ___OS_UNIX___ 1
  #define ___OS_WINDOWS___ 0
  #define ___HOT___ "atom.linux-x86-64.so"
#if ___OS_UNIX___
  #include <dlfcn.h>
  #include <errno.h>
  #include <fcntl.h>
  #include <sys/types.h>
  #include <sys/stat.h>
  #include <time.h>
  #include <unistd.h>
  static void FileUnlink(const char* path) { unlink(path); }
  static char __attribute__((aligned(4096))) fileBuf[4096];
  static void FileCopy(const char* dst, const char* src) {
    int srcF=open(src,O_RDONLY); int dstF=open(dst,O_WRONLY|O_CREAT|O_TRUNC,S_IRWXU);
    while(1) { ssize_t bytes=read(srcF,fileBuf,4096);
      if(bytes==0) { close(srcF); close(dstF); return; }
      if(bytes > 0) { char* buf=fileBuf;
        while(1) { const ssize_t bytes2=write(dstF,buf,bytes); if(bytes2==bytes) break;
          if(bytes2<0) { if(errno!=EINTR) break; } else { bytes-=bytes2; buf+=bytes2; } }
        continue; }
      if(errno==EINTR) continue; break; }
    close(srcF); close(dstF); unlink(dst); }
  #define LIB_SYM_STRING "dlsym"
  static void* LibSym(void* mod, const char* str) { return dlsym(mod,str); }
  static void* LibOpen(const char* str) { return dlopen(str, RTLD_LAZY); }
  static void LibClose(void* mod) { dlclose(mod); }
  static void Sleeper(void) { struct timespec t; t.tv_sec=0; t.tv_nsec=1000000000/60; nanosleep(&t,0); }
#if ___OS_WINDOWS___
  #include <Windows.h>
  #pragma comment(lib, "Kernel32")
  static void FileCopy(const char* dst, const char* src) { CopyFile(src, dst, 0); }
  static void FileUnlink(const char* path) { DeleteFile(path); }
  #define LIB_SYM_STRING "GetProcAddress"
  static void* LibSym(void* mod, const char* str) { return GetProcAddress((HMODULE)mod,str); }
  static void* LibOpen(const char* str) { return (void*) LoadLibrary(str); }
  static void LibClose(void* mod) { FreeLibrary((HMODULE)mod); }
  static void Sleeper(void) { Sleep(1000/60); }
typedef void (*HotF)(void**);
enum {
  HOT_LIB_SYM, // address to dlsym() or GetProcAddress()
  HOT_DATA,    // address to previous data, or 0 on first open
  HOT_BYTES,   // bytes of previous data, set by Hot()
  HOT_CLEAN,   // address to HotClean(), call after data copy
  HOT_LIB,     // handle to loaded lib
  HOT_LIB_OLD, // handle to old loaded lib
  HOT_VER,     // temp file version
static void* hot[HOT_TOTAL];
static void HotClean(void** h) { char name[4]={'0','.','t',0};
  { void* const old=h[HOT_LIB_OLD];
    if(old) { LibClose(old);
      { const unsigned int ver=(((unsigned int)(h[HOT_VER]))-1)&0xf;
        name[0]=(char)(ver+(ver<0xa?'0':'A'-0xA)); }
      FileUnlink(name); }
    h[HOT_LIB_OLD]=h[HOT_LIB]; h[HOT_VER]=h[HOT_VER]+1; } }
#if ___OS_UNIX___
  int main(void)
#if ___OS_WINDOWS___
  #ifdef ___DEBUG___
    int main(void)
    int CALLBACK WinMain(HINSTANCE i, HINSTANCE pi, LPSTR cmd, int show)
#if ___OS_UNIX___
  char name[8]={'.','/','0','.','t',0,0,0};
  char name[8]={'.','\\','0','.','t',0,0,0};
  hot[HOT_LIB_SYM]=LibSym(0,LIB_SYM_STRING); hot[HOT_CLEAN]=HotClean;
  while(1) { const unsigned int ver=((unsigned int)(hot[HOT_VER]))&0xf;
    { void* const lib=hot[HOT_LIB]=LibOpen(name);
      if(lib) { HotF Hot=LibSym(lib,"Hot"); if(Hot) { Hot(hot); continue; } } }
    Sleeper(); } return 0; }

Multi-Pass C Source Compile
In order to get to how things work, I'm going to need to first describe some very non-standard conventions I use in my game source. First the game source includes itself three times. This enables me to "use" things in the C source "before" they exist by just defining what pass the code should compile in.
#ifndef ___PASS___
  #define ___PASS___ 1
  #include "atom.c"
  #undef ___PASS___
  #define ___PASS___ 2
  #include "atom.c"
  #undef ___PASS___
  #define ___PASS___ 3

I setup a lot of defines to keep code short, use as a reference later.
  typedef double F8; typedef float F4;
  typedef unsigned int U4; typedef signed int S4;
  typedef unsigned short U2; typedef signed short S2;
  typedef unsigned char U1; typedef signed char S1;
  #if (___CC_GNU___)
    typedef unsigned long U8; typedef signed long S8;
    #if (___CPU_BITS___==32)
      typedef unsigned int UP; typedef signed int SP;
      typedef unsigned long UP; typedef signed long SP;
    #define __A__(b) __attribute__((aligned(b)))
    #define __E__(e,v) __builtin_expect((e), (v))
    #define __I__ static __attribute__((always_inline))
    #define __N__ static __attribute__((noinline))
    #define __S__ static
    #define NA restrict
And some more for typecasting,
  #define S1_(a) ((S1)(a))
  #define U1_(a) ((U1)(a))
  #define S2_(a) ((S2)(a))
  #define U2_(a) ((U2)(a))
  #define S4_(a) ((S4)(a))
  #define U4_(a) ((U4)(a))
  #define S8_(a) ((S8)(a))
  #define U8_(a) ((U8)(a))
  #define SP_(a) ((SP)(a))
  #define UP_(a) ((UP)(a))
  #define F4_(a) ((F4)(a))
  #define F8_(a) ((F8)(a))
  #define S1_P_(a) ((S1*)(a))
  #define U1_P_(a) ((U1*)(a))
  #define S2_P_(a) ((S2*)(a))
  #define U2_P_(a) ((U2*)(a))
  #define S4_P_(a) ((S4*)(a))
  #define U4_P_(a) ((U4*)(a))
  #define S8_P_(a) ((S8*)(a))
  #define U8_P_(a) ((U8*)(a))
  #define SP_P_(a) ((SP*)(a))
  #define UP_P_(a) ((UP*)(a))
  #define F4_P_(a) ((F4*)(a))
  #define F8_P_(a) ((F8*)(a))
  #define S1_NA_(a) ((S1* NA)(a))
  #define U1_NA_(a) ((U1* NA)(a))
  #define S2_NA_(a) ((S2* NA)(a))
  #define U2_NA_(a) ((U2* NA)(a))
  #define S4_NA_(a) ((S4* NA)(a))
  #define U4_NA_(a) ((U4* NA)(a))
  #define S8_NA_(a) ((S8* NA)(a))
  #define U8_NA_(a) ((U8* NA)(a))
  #define SP_NA_(a) ((SP* NA)(a))
  #define UP_NA_(a) ((UP* NA)(a))
  #define F4_NA_(a) ((F4* NA)(a))
  #define F8_NA_(a) ((F8* NA)(a))

All Global Data in One Structure
In order to re-attach code but keep static globals in the shared object, I need to copy the globals on re-attach. So all my globals end up in one structure. Note below this static "all" structure is defined with page alignment and has forced non-zero data to insure it does not get placed into the "zero" segment. This code is always at the end of my file. Part of the point of the "___PASS___==1" pass is to typedef all the structures which end up in this "all" structure. The multi-pass setup enables those typedefs to live in the source next to the code they are associated with. That code is in "___PASS___==2".

                       [ALL] EVERYTHING

#if (___PASS___==1)
  typedef struct {
    HotT hot;
    // ... all game global data is in here ...
    UP pad[8]; } All;
  __S__ All __A__(4096) all={{(LibSymbolF)(~0)}};
  __S__ All* NA const allNA=(All* NA)(&all);
  __S__ const All* NA const allCNA=(const All* NA)(&all);

Hot Swap Entry
The entry point in the game calls HotOpen() passing in the hot loader arguments.
#if (___PASS___==2)
  void Hot(HotArgT* NA arg) {
    if(HotOpen(arg)==0) {
      // First entry.
The HotOpen() on non-first load copies over the global data from the prior shared library then calls Clean() to have the loader unload the prior library. This function then sets up a pointer to the new global data and size for the next hot reload.
#if (___PASS___==1)
  typedef U4 (*LibCloseF)(UP);
  typedef UP (*LibOpenF)(const U1*, U4);
  typedef UP (*LibSymbolF)(UP, const U1*);
#if (___PASS___==1)
  typedef void (*HotClean)(UP);
  typedef struct { LibSymbolF Symbol; void* data; UP bytes; HotClean Clean; UP lib; UP oldLib; UP ver; } HotArgT;
  typedef struct { LibSymbolF Symbol; UP time; } HotT;
  __S__ const char* hotPath=___HOT___;
#if (___PASS___==2)
  __I__ UP HotOpen(HotArgT* NA arg) { 
    void* oldData=arg->data; 
    if(oldData) Memcpy8P(UP_NA_(&all), oldData, sizeof(all)/(sizeof(UP)*8));
    arg->Clean(UP_(arg)); arg->data=&all;
    return UP_(oldData); }
  __I__ UP HotReload(void) { return FileTime(UP_(hotPath))!=all.hot.time?1:0; }