ON THIS PAGE
    ARCHIVES
    CATEGORIES
    BLOGROLL
    LINKS
    SEARCH
    MY BOOKS
    DISCLAIMER
 
 Sunday, February 24, 2008
Some interesting tidbits about LLVM

LLVM definitely does some interesting things as part of its toolchain.

Consider the humble HelloWorld:

   1: #include <stdio.h>
   2:  
   3: int main() {
   4:   printf("hello world\n");
   5:   return 0;
   6: }

Assuming you have a functioning llvm and llvm-gcc working on your system, you can compile it into LLVM bitcode. This bitcode is directly executable using the lli.exe from llvm:

$ lli < hello.bc
hello world

Meh. Not so interesting. Let's look at the LLVM bitcode for the code, though--that's interesting as a first peek at what LLVM bitcode might look like:

   1: ; ModuleID = '<stdin>'
   2: target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
   3: target triple = "mingw32"
   4: @.str = internal constant [12 x i8] c"hello world\00"        ; <[12 x i8]*> [#uses=1] 
   5:  
   6: define i32 @main() {
   7: entry:
   8:     %tmp2 = tail call i32 @puts( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0) )        ; <i32> [#uses=0]
   9:     ret i32 0
  10: } 
  11:  
  12: declare i32 @puts(i8*)

Hmm. Now of course, LLVM also has to be able to get down to actual machine instructions, and in point of fact there is a tool in the LLVM toolchain, called llc, that can do this transformation ahead-of-time, like so:

$ llc hello.bc -o hello.bc.s -march x86

And, looking at the results, we see...

   1: .text
   2: .align    16
   3: .globl    _main
   4: .def     _main;    .scl    2;    .type    32;    .endef
   5: n:
   6: pushl    %ebp
   7: movl    %esp, %ebp
   8: subl    $8, %esp
   9: andl    $4294967280, %esp
  10: movl    $16, %eax
  11: call    __alloca
  12: call    ___main
  13: movl    $_.str, (%esp)
  14: call    _puts
  15: xorl    %eax, %eax
  16: movl    %ebp, %esp
  17: popl    %ebp
  18: ret
  19: .data
  20: r:                # .str
  21: .asciz    "hello world"
  22: .def     _puts;    .scl    2;    .type    32;    .endef

Bleah. Assembly language, and in NASM format, to boot. (What did you expect, anyway?)

Of course, assembly language and C were always considered fairly close together in terms of their abstraction layer (C was designed as a replacement for assembly language when porting Unix, remember), so it might not be too hard to...

$ llc hello.bc -o hello.bc.c -march c

And get...

   1: /* Provide Declarations */
   2: #include <stdarg.h>
   3: #include <setjmp.h>
   4: /* get a declaration for alloca */
   5: #if defined(__CYGWIN__) || defined(__MINGW32__)
   6: #define  alloca(x) __builtin_alloca((x))
   7: #define _alloca(x) __builtin_alloca((x))
   8: #elif defined(__APPLE__)
   9: extern void *__builtin_alloca(unsigned long);
  10: #define alloca(x) __builtin_alloca(x)
  11: #define longjmp _longjmp
  12: #define setjmp _setjmp
  13: #elif defined(__sun__)
  14: #if defined(__sparcv9)
  15: extern void *__builtin_alloca(unsigned long);
  16: #else
  17: extern void *__builtin_alloca(unsigned int);
  18: #endif
  19: #define alloca(x) __builtin_alloca(x)
  20: #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
  21: #define alloca(x) __builtin_alloca(x)
  22: #elif defined(_MSC_VER)
  23: #define inline _inline
  24: #define alloca(x) _alloca(x)
  25: #else
  26: #include <alloca.h>
  27: #endif
  28:  
  29: #ifndef __GNUC__  /* Can only support "linkonce" vars with GCC */
  30: #define __attribute__(X)
  31: #endif
  32:  
  33: #if defined(__GNUC__) && defined(__APPLE_CC__)
  34: #define __EXTERNAL_WEAK__ __attribute__((weak_import))
  35: #elif defined(__GNUC__)
  36: #define __EXTERNAL_WEAK__ __attribute__((weak))
  37: #else
  38: #define __EXTERNAL_WEAK__
  39: #endif
  40:  
  41: #if defined(__GNUC__) && defined(__APPLE_CC__)
  42: #define __ATTRIBUTE_WEAK__
  43: #elif defined(__GNUC__)
  44: #define __ATTRIBUTE_WEAK__ __attribute__((weak))
  45: #else
  46: #define __ATTRIBUTE_WEAK__
  47: #endif
  48:  
  49: #if defined(__GNUC__)
  50: #define __HIDDEN__ __attribute__((visibility("hidden")))
  51: #endif
  52:  
  53: #ifdef __GNUC__
  54: #define LLVM_NAN(NanStr)   __builtin_nan(NanStr)   /* Double */
  55: #define LLVM_NANF(NanStr)  __builtin_nanf(NanStr)  /* Float */
  56: #define LLVM_NANS(NanStr)  __builtin_nans(NanStr)  /* Double */
  57: #define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */
  58: #define LLVM_INF           __builtin_inf()         /* Double */
  59: #define LLVM_INFF          __builtin_inff()        /* Float */
  60: #define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality)
  61: #define __ATTRIBUTE_CTOR__ __attribute__((constructor))
  62: #define __ATTRIBUTE_DTOR__ __attribute__((destructor))
  63: #define LLVM_ASM           __asm__
  64: #else
  65: #define LLVM_NAN(NanStr)   ((double)0.0)           /* Double */
  66: #define LLVM_NANF(NanStr)  0.0F                    /* Float */
  67: #define LLVM_NANS(NanStr)  ((double)0.0)           /* Double */
  68: #define LLVM_NANSF(NanStr) 0.0F                    /* Float */
  69: #define LLVM_INF           ((double)0.0)           /* Double */
  70: #define LLVM_INFF          0.0F                    /* Float */
  71: #define LLVM_PREFETCH(addr,rw,locality)            /* PREFETCH */
  72: #define __ATTRIBUTE_CTOR__
  73: #define __ATTRIBUTE_DTOR__
  74: #define LLVM_ASM(X)
  75: #endif
  76:  
  77: #if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ 
  78: #define __builtin_stack_save() 0   /* not implemented */
  79: #define __builtin_stack_restore(X) /* noop */
  80: #endif
  81:  
  82: #define CODE_FOR_MAIN() /* Any target-specific code for main()*/
  83:  
  84: #ifndef __cplusplus
  85: typedef unsigned char bool;
  86: #endif
  87:  
  88:  
  89: /* Support for floating point constants */
  90: typedef unsigned long long ConstantDoubleTy;
  91: typedef unsigned int        ConstantFloatTy;
  92: typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty;
  93: typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty;
  94:  
  95:  
  96: /* Global Declarations */
  97: /* Helper union for bitcasts */
  98: typedef union {
  99:   unsigned int Int32;
 100:   unsigned long long Int64;
 101:   float Float;
 102:   double Double;
 103: } llvmBitCastUnion;
 104:  
 105: /* External Global Variable Declarations */
 106:  
 107: /* Function Declarations */
 108: double fmod(double, double);
 109: float fmodf(float, float);
 110: long double fmodl(long double, long double);
 111: unsigned int main(void);
 112: unsigned int puts(unsigned char *);
 113: unsigned char *malloc();
 114: void free(unsigned char *);
 115: void abort(void);
 116: