diff options
Diffstat (limited to 'examples')
43 files changed, 8209 insertions, 0 deletions
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp new file mode 100644 index 0000000..a443ad4 --- /dev/null +++ b/examples/BrainF/BrainF.cpp @@ -0,0 +1,467 @@ +//===-- BrainF.cpp - BrainF compiler example ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------===// +// +// This class compiles the BrainF language into LLVM assembly. +// +// The BrainF language has 8 commands: +// Command Equivalent C Action +// ------- ------------ ------ +// , *h=getchar(); Read a character from stdin, 255 on EOF +// . putchar(*h); Write a character to stdout +// - --*h; Decrement tape +// + ++*h; Increment tape +// < --h; Move head left +// > ++h; Move head right +// [ while(*h) { Start loop +// ] } End loop +// +//===--------------------------------------------------------------------===// + +#include "BrainF.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/ADT/STLExtras.h" +#include <iostream> +using namespace llvm; + +//Set the constants for naming +const char *BrainF::tapereg = "tape"; +const char *BrainF::headreg = "head"; +const char *BrainF::label = "brainf"; +const char *BrainF::testreg = "test"; + +Module *BrainF::parse(std::istream *in1, int mem, CompileFlags cf, + LLVMContext& Context) { + in = in1; + memtotal = mem; + comflag = cf; + + header(Context); + readloop(0, 0, 0, Context); + delete builder; + return module; +} + +void BrainF::header(LLVMContext& C) { + module = new Module("BrainF", C); + + //Function prototypes + + //declare void @llvm.memset.i32(i8 *, i8, i32, i32) + const Type *Tys[] = { Type::getInt32Ty(C) }; + Function *memset_func = Intrinsic::getDeclaration(module, Intrinsic::memset, + Tys, 1); + + //declare i32 @getchar() + getchar_func = cast<Function>(module-> + getOrInsertFunction("getchar", IntegerType::getInt32Ty(C), NULL)); + + //declare i32 @putchar(i32) + putchar_func = cast<Function>(module-> + getOrInsertFunction("putchar", IntegerType::getInt32Ty(C), + IntegerType::getInt32Ty(C), NULL)); + + + //Function header + + //define void @brainf() + brainf_func = cast<Function>(module-> + getOrInsertFunction("brainf", Type::getVoidTy(C), NULL)); + + builder = new IRBuilder<>(BasicBlock::Create(C, label, brainf_func)); + + //%arr = malloc i8, i32 %d + ConstantInt *val_mem = ConstantInt::get(C, APInt(32, memtotal)); + BasicBlock* BB = builder->GetInsertBlock(); + const Type* IntPtrTy = IntegerType::getInt32Ty(C); + const Type* Int8Ty = IntegerType::getInt8Ty(C); + Constant* allocsize = ConstantExpr::getSizeOf(Int8Ty); + allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy); + ptr_arr = CallInst::CreateMalloc(BB, IntPtrTy, Int8Ty, allocsize, val_mem, + NULL, "arr"); + BB->getInstList().push_back(cast<Instruction>(ptr_arr)); + + //call void @llvm.memset.i32(i8 *%arr, i8 0, i32 %d, i32 1) + { + Value *memset_params[] = { + ptr_arr, + ConstantInt::get(C, APInt(8, 0)), + val_mem, + ConstantInt::get(C, APInt(32, 1)) + }; + + CallInst *memset_call = builder-> + CreateCall(memset_func, memset_params, array_endof(memset_params)); + memset_call->setTailCall(false); + } + + //%arrmax = getelementptr i8 *%arr, i32 %d + if (comflag & flag_arraybounds) { + ptr_arrmax = builder-> + CreateGEP(ptr_arr, ConstantInt::get(C, APInt(32, memtotal)), "arrmax"); + } + + //%head.%d = getelementptr i8 *%arr, i32 %d + curhead = builder->CreateGEP(ptr_arr, + ConstantInt::get(C, APInt(32, memtotal/2)), + headreg); + + + + //Function footer + + //brainf.end: + endbb = BasicBlock::Create(C, label, brainf_func); + + //call free(i8 *%arr) + endbb->getInstList().push_back(CallInst::CreateFree(ptr_arr, endbb)); + + //ret void + ReturnInst::Create(C, endbb); + + + + //Error block for array out of bounds + if (comflag & flag_arraybounds) + { + //@aberrormsg = internal constant [%d x i8] c"\00" + Constant *msg_0 = + ConstantArray::get(C, "Error: The head has left the tape.", true); + + GlobalVariable *aberrormsg = new GlobalVariable( + *module, + msg_0->getType(), + true, + GlobalValue::InternalLinkage, + msg_0, + "aberrormsg"); + + //declare i32 @puts(i8 *) + Function *puts_func = cast<Function>(module-> + getOrInsertFunction("puts", IntegerType::getInt32Ty(C), + PointerType::getUnqual(IntegerType::getInt8Ty(C)), NULL)); + + //brainf.aberror: + aberrorbb = BasicBlock::Create(C, label, brainf_func); + + //call i32 @puts(i8 *getelementptr([%d x i8] *@aberrormsg, i32 0, i32 0)) + { + Constant *zero_32 = Constant::getNullValue(IntegerType::getInt32Ty(C)); + + Constant *gep_params[] = { + zero_32, + zero_32 + }; + + Constant *msgptr = ConstantExpr:: + getGetElementPtr(aberrormsg, gep_params, + array_lengthof(gep_params)); + + Value *puts_params[] = { + msgptr + }; + + CallInst *puts_call = + CallInst::Create(puts_func, + puts_params, array_endof(puts_params), + "", aberrorbb); + puts_call->setTailCall(false); + } + + //br label %brainf.end + BranchInst::Create(endbb, aberrorbb); + } +} + +void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb, + LLVMContext &C) { + Symbol cursym = SYM_NONE; + int curvalue = 0; + Symbol nextsym = SYM_NONE; + int nextvalue = 0; + char c; + int loop; + int direction; + + while(cursym != SYM_EOF && cursym != SYM_ENDLOOP) { + // Write out commands + switch(cursym) { + case SYM_NONE: + // Do nothing + break; + + case SYM_READ: + { + //%tape.%d = call i32 @getchar() + CallInst *getchar_call = builder->CreateCall(getchar_func, tapereg); + getchar_call->setTailCall(false); + Value *tape_0 = getchar_call; + + //%tape.%d = trunc i32 %tape.%d to i8 + Value *tape_1 = builder-> + CreateTrunc(tape_0, IntegerType::getInt8Ty(C), tapereg); + + //store i8 %tape.%d, i8 *%head.%d + builder->CreateStore(tape_1, curhead); + } + break; + + case SYM_WRITE: + { + //%tape.%d = load i8 *%head.%d + LoadInst *tape_0 = builder->CreateLoad(curhead, tapereg); + + //%tape.%d = sext i8 %tape.%d to i32 + Value *tape_1 = builder-> + CreateSExt(tape_0, IntegerType::getInt32Ty(C), tapereg); + + //call i32 @putchar(i32 %tape.%d) + Value *putchar_params[] = { + tape_1 + }; + CallInst *putchar_call = builder-> + CreateCall(putchar_func, + putchar_params, array_endof(putchar_params)); + putchar_call->setTailCall(false); + } + break; + + case SYM_MOVE: + { + //%head.%d = getelementptr i8 *%head.%d, i32 %d + curhead = builder-> + CreateGEP(curhead, ConstantInt::get(C, APInt(32, curvalue)), + headreg); + + //Error block for array out of bounds + if (comflag & flag_arraybounds) + { + //%test.%d = icmp uge i8 *%head.%d, %arrmax + Value *test_0 = builder-> + CreateICmpUGE(curhead, ptr_arrmax, testreg); + + //%test.%d = icmp ult i8 *%head.%d, %arr + Value *test_1 = builder-> + CreateICmpULT(curhead, ptr_arr, testreg); + + //%test.%d = or i1 %test.%d, %test.%d + Value *test_2 = builder-> + CreateOr(test_0, test_1, testreg); + + //br i1 %test.%d, label %main.%d, label %main.%d + BasicBlock *nextbb = BasicBlock::Create(C, label, brainf_func); + builder->CreateCondBr(test_2, aberrorbb, nextbb); + + //main.%d: + builder->SetInsertPoint(nextbb); + } + } + break; + + case SYM_CHANGE: + { + //%tape.%d = load i8 *%head.%d + LoadInst *tape_0 = builder->CreateLoad(curhead, tapereg); + + //%tape.%d = add i8 %tape.%d, %d + Value *tape_1 = builder-> + CreateAdd(tape_0, ConstantInt::get(C, APInt(8, curvalue)), tapereg); + + //store i8 %tape.%d, i8 *%head.%d\n" + builder->CreateStore(tape_1, curhead); + } + break; + + case SYM_LOOP: + { + //br label %main.%d + BasicBlock *testbb = BasicBlock::Create(C, label, brainf_func); + builder->CreateBr(testbb); + + //main.%d: + BasicBlock *bb_0 = builder->GetInsertBlock(); + BasicBlock *bb_1 = BasicBlock::Create(C, label, brainf_func); + builder->SetInsertPoint(bb_1); + + // Make part of PHI instruction now, wait until end of loop to finish + PHINode *phi_0 = + PHINode::Create(PointerType::getUnqual(IntegerType::getInt8Ty(C)), + headreg, testbb); + phi_0->reserveOperandSpace(2); + phi_0->addIncoming(curhead, bb_0); + curhead = phi_0; + + readloop(phi_0, bb_1, testbb, C); + } + break; + + default: + std::cerr << "Error: Unknown symbol.\n"; + abort(); + break; + } + + cursym = nextsym; + curvalue = nextvalue; + nextsym = SYM_NONE; + + // Reading stdin loop + loop = (cursym == SYM_NONE) + || (cursym == SYM_MOVE) + || (cursym == SYM_CHANGE); + while(loop) { + *in>>c; + if (in->eof()) { + if (cursym == SYM_NONE) { + cursym = SYM_EOF; + } else { + nextsym = SYM_EOF; + } + loop = 0; + } else { + direction = 1; + switch(c) { + case '-': + direction = -1; + // Fall through + + case '+': + if (cursym == SYM_CHANGE) { + curvalue += direction; + // loop = 1 + } else { + if (cursym == SYM_NONE) { + cursym = SYM_CHANGE; + curvalue = direction; + // loop = 1 + } else { + nextsym = SYM_CHANGE; + nextvalue = direction; + loop = 0; + } + } + break; + + case '<': + direction = -1; + // Fall through + + case '>': + if (cursym == SYM_MOVE) { + curvalue += direction; + // loop = 1 + } else { + if (cursym == SYM_NONE) { + cursym = SYM_MOVE; + curvalue = direction; + // loop = 1 + } else { + nextsym = SYM_MOVE; + nextvalue = direction; + loop = 0; + } + } + break; + + case ',': + if (cursym == SYM_NONE) { + cursym = SYM_READ; + } else { + nextsym = SYM_READ; + } + loop = 0; + break; + + case '.': + if (cursym == SYM_NONE) { + cursym = SYM_WRITE; + } else { + nextsym = SYM_WRITE; + } + loop = 0; + break; + + case '[': + if (cursym == SYM_NONE) { + cursym = SYM_LOOP; + } else { + nextsym = SYM_LOOP; + } + loop = 0; + break; + + case ']': + if (cursym == SYM_NONE) { + cursym = SYM_ENDLOOP; + } else { + nextsym = SYM_ENDLOOP; + } + loop = 0; + break; + + // Ignore other characters + default: + break; + } + } + } + } + + if (cursym == SYM_ENDLOOP) { + if (!phi) { + std::cerr << "Error: Extra ']'\n"; + abort(); + } + + // Write loop test + { + //br label %main.%d + builder->CreateBr(testbb); + + //main.%d: + + //%head.%d = phi i8 *[%head.%d, %main.%d], [%head.%d, %main.%d] + //Finish phi made at beginning of loop + phi->addIncoming(curhead, builder->GetInsertBlock()); + Value *head_0 = phi; + + //%tape.%d = load i8 *%head.%d + LoadInst *tape_0 = new LoadInst(head_0, tapereg, testbb); + + //%test.%d = icmp eq i8 %tape.%d, 0 + ICmpInst *test_0 = new ICmpInst(*testbb, ICmpInst::ICMP_EQ, tape_0, + ConstantInt::get(C, APInt(8, 0)), testreg); + + //br i1 %test.%d, label %main.%d, label %main.%d + BasicBlock *bb_0 = BasicBlock::Create(C, label, brainf_func); + BranchInst::Create(bb_0, oldbb, test_0, testbb); + + //main.%d: + builder->SetInsertPoint(bb_0); + + //%head.%d = phi i8 *[%head.%d, %main.%d] + PHINode *phi_1 = builder-> + CreatePHI(PointerType::getUnqual(IntegerType::getInt8Ty(C)), headreg); + phi_1->reserveOperandSpace(1); + phi_1->addIncoming(head_0, testbb); + curhead = phi_1; + } + + return; + } + + //End of the program, so go to return block + builder->CreateBr(endbb); + + if (phi) { + std::cerr << "Error: Missing ']'\n"; + abort(); + } +} diff --git a/examples/BrainF/BrainF.h b/examples/BrainF/BrainF.h new file mode 100644 index 0000000..add0687 --- /dev/null +++ b/examples/BrainF/BrainF.h @@ -0,0 +1,94 @@ +//===-- BrainF.h - BrainF compiler class ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------===// +// +// This class stores the data for the BrainF compiler so it doesn't have +// to pass all of it around. The main method is parse. +// +//===--------------------------------------------------------------------===// + +#ifndef BRAINF_H +#define BRAINF_H + +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Support/IRBuilder.h" + +using namespace llvm; + +/// This class provides a parser for the BrainF language. +/// The class itself is made to store values during +/// parsing so they don't have to be passed around +/// as much. +class BrainF { + public: + /// Options for how BrainF should compile + enum CompileFlags { + flag_off = 0, + flag_arraybounds = 1 + }; + + /// This is the main method. It parses BrainF from in1 + /// and returns the module with a function + /// void brainf() + /// containing the resulting code. + /// On error, it calls abort. + /// The caller must delete the returned module. + Module *parse(std::istream *in1, int mem, CompileFlags cf, + LLVMContext& C); + + protected: + /// The different symbols in the BrainF language + enum Symbol { + SYM_NONE, + SYM_READ, + SYM_WRITE, + SYM_MOVE, + SYM_CHANGE, + SYM_LOOP, + SYM_ENDLOOP, + SYM_EOF + }; + + /// Names of the different parts of the language. + /// Tape is used for reading and writing the tape. + /// headreg is used for the position of the head. + /// label is used for the labels for the BasicBlocks. + /// testreg is used for testing the loop exit condition. + static const char *tapereg; + static const char *headreg; + static const char *label; + static const char *testreg; + + /// Put the brainf function preamble and other fixed pieces of code + void header(LLVMContext& C); + + /// The main loop for parsing. It calls itself recursively + /// to handle the depth of nesting of "[]". + void readloop(PHINode *phi, BasicBlock *oldbb, + BasicBlock *testbb, LLVMContext &Context); + + /// Constants during parsing + int memtotal; + CompileFlags comflag; + std::istream *in; + Module *module; + Function *brainf_func; + Function *getchar_func; + Function *putchar_func; + Value *ptr_arr; + Value *ptr_arrmax; + BasicBlock *endbb; + BasicBlock *aberrorbb; + + /// Variables + IRBuilder<> *builder; + Value *curhead; +}; + +#endif diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp new file mode 100644 index 0000000..c11a580 --- /dev/null +++ b/examples/BrainF/BrainFDriver.cpp @@ -0,0 +1,160 @@ +//===-- BrainFDriver.cpp - BrainF compiler driver -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------===// +// +// This program converts the BrainF language into LLVM assembly, +// which it can then run using the JIT or output as BitCode. +// +// This implementation has a tape of 65536 bytes, +// with the head starting in the middle. +// Range checking is off by default, so be careful. +// It can be enabled with -abc. +// +// Use: +// ./BrainF -jit prog.bf #Run program now +// ./BrainF -jit -abc prog.bf #Run program now safely +// ./BrainF prog.bf #Write as BitCode +// +// lli prog.bf.bc #Run generated BitCode +// llvm-ld -native -o=prog prog.bf.bc #Compile BitCode into native executable +// +//===--------------------------------------------------------------------===// + +#include "BrainF.h" +#include "llvm/Constants.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/Target/TargetSelect.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" +#include <iostream> +#include <fstream> +using namespace llvm; + +//Command line options + +static cl::opt<std::string> +InputFilename(cl::Positional, cl::desc("<input brainf>")); + +static cl::opt<std::string> +OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename")); + +static cl::opt<bool> +ArrayBoundsChecking("abc", cl::desc("Enable array bounds checking")); + +static cl::opt<bool> +JIT("jit", cl::desc("Run program Just-In-Time")); + + +//Add main function so can be fully compiled +void addMainFunction(Module *mod) { + //define i32 @main(i32 %argc, i8 **%argv) + Function *main_func = cast<Function>(mod-> + getOrInsertFunction("main", IntegerType::getInt32Ty(mod->getContext()), + IntegerType::getInt32Ty(mod->getContext()), + PointerType::getUnqual(PointerType::getUnqual( + IntegerType::getInt8Ty(mod->getContext()))), NULL)); + { + Function::arg_iterator args = main_func->arg_begin(); + Value *arg_0 = args++; + arg_0->setName("argc"); + Value *arg_1 = args++; + arg_1->setName("argv"); + } + + //main.0: + BasicBlock *bb = BasicBlock::Create(mod->getContext(), "main.0", main_func); + + //call void @brainf() + { + CallInst *brainf_call = CallInst::Create(mod->getFunction("brainf"), + "", bb); + brainf_call->setTailCall(false); + } + + //ret i32 0 + ReturnInst::Create(mod->getContext(), + ConstantInt::get(mod->getContext(), APInt(32, 0)), bb); +} + +int main(int argc, char **argv) { + cl::ParseCommandLineOptions(argc, argv, " BrainF compiler\n"); + + LLVMContext &Context = getGlobalContext(); + + if (InputFilename == "") { + errs() << "Error: You must specify the filename of the program to " + "be compiled. Use --help to see the options.\n"; + abort(); + } + + //Get the output stream + raw_ostream *out = &outs(); + if (!JIT) { + if (OutputFilename == "") { + std::string base = InputFilename; + if (InputFilename == "-") { base = "a"; } + + // Use default filename. + OutputFilename = base+".bc"; + } + if (OutputFilename != "-") { + std::string ErrInfo; + out = new raw_fd_ostream(OutputFilename.c_str(), ErrInfo, + raw_fd_ostream::F_Binary); + } + } + + //Get the input stream + std::istream *in = &std::cin; + if (InputFilename != "-") + in = new std::ifstream(InputFilename.c_str()); + + //Gather the compile flags + BrainF::CompileFlags cf = BrainF::flag_off; + if (ArrayBoundsChecking) + cf = BrainF::CompileFlags(cf | BrainF::flag_arraybounds); + + //Read the BrainF program + BrainF bf; + Module *mod = bf.parse(in, 65536, cf, Context); //64 KiB + if (in != &std::cin) + delete in; + addMainFunction(mod); + + //Verify generated code + if (verifyModule(*mod)) { + errs() << "Error: module failed verification. This shouldn't happen.\n"; + abort(); + } + + //Write it out + if (JIT) { + InitializeNativeTarget(); + + outs() << "------- Running JIT -------\n"; + ExecutionEngine *ee = EngineBuilder(mod).create(); + std::vector<GenericValue> args; + Function *brainf_func = mod->getFunction("brainf"); + GenericValue gv = ee->runFunction(brainf_func, args); + } else { + WriteBitcodeToFile(mod, *out); + } + + //Clean up + if (out != &outs()) + delete out; + delete mod; + + llvm_shutdown(); + + return 0; +} diff --git a/examples/BrainF/CMakeLists.txt b/examples/BrainF/CMakeLists.txt new file mode 100644 index 0000000..7bec105 --- /dev/null +++ b/examples/BrainF/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_LINK_COMPONENTS jit bitwriter nativecodegen interpreter) + +add_llvm_example(BrainF + BrainF.cpp + BrainFDriver.cpp + ) diff --git a/examples/BrainF/Makefile b/examples/BrainF/Makefile new file mode 100644 index 0000000..2c3e066 --- /dev/null +++ b/examples/BrainF/Makefile @@ -0,0 +1,15 @@ +##===- examples/BrainF/Makefile ----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../.. +TOOLNAME = BrainF +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := jit bitwriter nativecodegen interpreter + +include $(LEVEL)/Makefile.common diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000..f60c0ed --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,16 @@ +add_subdirectory(BrainF) +add_subdirectory(Fibonacci) +add_subdirectory(HowToUseJIT) +add_subdirectory(Kaleidoscope) +add_subdirectory(ModuleMaker) + +if( NOT WIN32 ) + add_subdirectory(ExceptionDemo) +endif() + +include(CheckIncludeFile) +check_include_file(pthread.h HAVE_PTHREAD_H) + +if( HAVE_PTHREAD_H ) + add_subdirectory(ParallelJIT) +endif( HAVE_PTHREAD_H ) diff --git a/examples/ExceptionDemo/CMakeLists.txt b/examples/ExceptionDemo/CMakeLists.txt new file mode 100644 index 0000000..d661915 --- /dev/null +++ b/examples/ExceptionDemo/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS jit nativecodegen) + +add_llvm_example(ExceptionDemo + ExceptionDemo.cpp + ) diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp new file mode 100644 index 0000000..46dc481 --- /dev/null +++ b/examples/ExceptionDemo/ExceptionDemo.cpp @@ -0,0 +1,2028 @@ +//===-- examples/ExceptionDemo/ExceptionDemo.cpp - +// An example use of the llvm Exception mechanism --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------===// +// +// Demo program which implements an example LLVM exception implementation, and +// shows several test cases including the handling of foreign exceptions. +// It is run with type info types arguments to throw. A test will +// be run for each given type info type. While type info types with the value +// of -1 will trigger a foreign C++ exception to be thrown; type info types +// <= 6 and >= 1 will cause the associated generated exceptions to be thrown +// and caught by generated test functions; and type info types > 6 +// will result in exceptions which pass through to the test harness. All other +// type info types are not supported and could cause a crash. In all cases, +// the "finally" blocks of every generated test functions will executed +// regardless of whether or not that test function ignores or catches the +// thrown exception. +// +// examples: +// +// ExceptionDemo +// +// causes a usage to be printed to stderr +// +// ExceptionDemo 2 3 7 -1 +// +// results in the following cases: +// - Value 2 causes an exception with a type info type of 2 to be +// thrown and caught by an inner generated test function. +// - Value 3 causes an exception with a type info type of 3 to be +// thrown and caught by an outer generated test function. +// - Value 7 causes an exception with a type info type of 7 to be +// thrown and NOT be caught by any generated function. +// - Value -1 causes a foreign C++ exception to be thrown and not be +// caught by any generated function +// +// Cases -1 and 7 are caught by a C++ test harness where the validity of +// of a C++ catch(...) clause catching a generated exception with a +// type info type of 7 is questionable. +// +// This code uses code from the llvm compiler-rt project and the llvm +// Kaleidoscope project. +// +//===--------------------------------------------------------------------===// + + +#include "llvm/LLVMContext.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Intrinsics.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetSelect.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/Dwarf.h" + +#include <cstdio> +#include <string> +#include <sstream> +#include <map> +#include <vector> +#include <stdexcept> + + +#ifndef USE_GLOBAL_STR_CONSTS +#define USE_GLOBAL_STR_CONSTS true +#endif + +// System C++ ABI unwind types from: +// http://refspecs.freestandards.org/abi-eh-1.21.html + +extern "C" { + +typedef enum { + _URC_NO_REASON = 0, + _URC_FOREIGN_EXCEPTION_CAUGHT = 1, + _URC_FATAL_PHASE2_ERROR = 2, + _URC_FATAL_PHASE1_ERROR = 3, + _URC_NORMAL_STOP = 4, + _URC_END_OF_STACK = 5, + _URC_HANDLER_FOUND = 6, + _URC_INSTALL_CONTEXT = 7, + _URC_CONTINUE_UNWIND = 8 +} _Unwind_Reason_Code; + +typedef enum { + _UA_SEARCH_PHASE = 1, + _UA_CLEANUP_PHASE = 2, + _UA_HANDLER_FRAME = 4, + _UA_FORCE_UNWIND = 8, + _UA_END_OF_STACK = 16 +} _Unwind_Action; + +struct _Unwind_Exception; + +typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code, + struct _Unwind_Exception *); + +struct _Unwind_Exception { + uint64_t exception_class; + _Unwind_Exception_Cleanup_Fn exception_cleanup; + + uintptr_t private_1; + uintptr_t private_2; + + // @@@ The IA-64 ABI says that this structure must be double-word aligned. + // Taking that literally does not make much sense generically. Instead + // we provide the maximum alignment required by any type for the machine. +} __attribute__((__aligned__)); + +struct _Unwind_Context; +typedef struct _Unwind_Context* _Unwind_Context_t; + +extern const uint8_t* _Unwind_GetLanguageSpecificData (_Unwind_Context_t c); +extern uintptr_t _Unwind_GetGR (_Unwind_Context_t c, int i); +extern void _Unwind_SetGR (_Unwind_Context_t c, int i, uintptr_t n); +extern void _Unwind_SetIP (_Unwind_Context_t, uintptr_t new_value); +extern uintptr_t _Unwind_GetIP (_Unwind_Context_t context); +extern uintptr_t _Unwind_GetRegionStart (_Unwind_Context_t context); + +} // extern "C" + +// +// Example types +// + +/// This is our simplistic type info +struct OurExceptionType_t { + /// type info type + int type; +}; + + +/// This is our Exception class which relies on a negative offset to calculate +/// pointers to its instances from pointers to its unwindException member. +/// +/// Note: The above unwind.h defines struct _Unwind_Exception to be aligned +/// on a double word boundary. This is necessary to match the standard: +/// http://refspecs.freestandards.org/abi-eh-1.21.html +struct OurBaseException_t { + struct OurExceptionType_t type; + + // Note: This is properly aligned in unwind.h + struct _Unwind_Exception unwindException; +}; + + +// Note: Not needed since we are C++ +typedef struct OurBaseException_t OurException; +typedef struct _Unwind_Exception OurUnwindException; + +// +// Various globals used to support typeinfo and generatted exceptions in +// general +// + +static std::map<std::string, llvm::Value*> namedValues; + +int64_t ourBaseFromUnwindOffset; + +const unsigned char ourBaseExcpClassChars[] = + {'o', 'b', 'j', '\0', 'b', 'a', 's', '\0'}; + + +static uint64_t ourBaseExceptionClass = 0; + +static std::vector<std::string> ourTypeInfoNames; +static std::map<int, std::string> ourTypeInfoNamesIndex; + +static llvm::StructType* ourTypeInfoType; +static llvm::StructType* ourExceptionType; +static llvm::StructType* ourUnwindExceptionType; + +static llvm::ConstantInt* ourExceptionNotThrownState; +static llvm::ConstantInt* ourExceptionThrownState; +static llvm::ConstantInt* ourExceptionCaughtState; + +typedef std::vector<std::string> ArgNames; +typedef std::vector<const llvm::Type*> ArgTypes; + +// +// Code Generation Utilities +// + +/// Utility used to create a function, both declarations and definitions +/// @param module for module instance +/// @param retType function return type +/// @param theArgTypes function's ordered argument types +/// @param theArgNames function's ordered arguments needed if use of this +/// function corresponds to a function definition. Use empty +/// aggregate for function declarations. +/// @param functName function name +/// @param linkage function linkage +/// @param declarationOnly for function declarations +/// @param isVarArg function uses vararg arguments +/// @returns function instance +llvm::Function *createFunction(llvm::Module& module, + const llvm::Type* retType, + const ArgTypes& theArgTypes, + const ArgNames& theArgNames, + const std::string& functName, + llvm::GlobalValue::LinkageTypes linkage, + bool declarationOnly, + bool isVarArg) { + llvm::FunctionType* functType = llvm::FunctionType::get(retType, + theArgTypes, + isVarArg); + llvm::Function* ret = llvm::Function::Create(functType, + linkage, + functName, + &module); + if (!ret || declarationOnly) + return(ret); + + namedValues.clear(); + unsigned i = 0; + for (llvm::Function::arg_iterator argIndex = ret->arg_begin(); + i != theArgNames.size(); + ++argIndex, ++i) { + + argIndex->setName(theArgNames[i]); + namedValues[theArgNames[i]] = argIndex; + } + + return(ret); +} + + +/// Create an alloca instruction in the entry block of +/// the parent function. This is used for mutable variables etc. +/// @param function parent instance +/// @param varName stack variable name +/// @param type stack variable type +/// @param initWith optional constant initialization value +/// @returns AllocaInst instance +static llvm::AllocaInst *createEntryBlockAlloca(llvm::Function& function, + const std::string &varName, + const llvm::Type* type, + llvm::Constant* initWith = NULL) { + llvm::BasicBlock& block = function.getEntryBlock(); + llvm::IRBuilder<> tmp(&block, block.begin()); + llvm::AllocaInst* ret = tmp.CreateAlloca(type, 0, varName.c_str()); + + if (initWith) + tmp.CreateStore(initWith, ret); + + return(ret); +} + + +// +// Code Generation Utilities End +// + +// +// Runtime C Library functions +// + +// Note: using an extern "C" block so that static functions can be used +extern "C" { + +// Note: Better ways to decide on bit width +// +/// Prints a 32 bit number, according to the format, to stderr. +/// @param intToPrint integer to print +/// @param format printf like format to use when printing +void print32Int(int intToPrint, const char* format) { + if (format) { + // Note: No NULL check + fprintf(stderr, format, intToPrint); + } + else { + // Note: No NULL check + fprintf(stderr, "::print32Int(...):NULL arg.\n"); + } +} + + +// Note: Better ways to decide on bit width +// +/// Prints a 64 bit number, according to the format, to stderr. +/// @param intToPrint integer to print +/// @param format printf like format to use when printing +void print64Int(long int intToPrint, const char* format) { + if (format) { + // Note: No NULL check + fprintf(stderr, format, intToPrint); + } + else { + // Note: No NULL check + fprintf(stderr, "::print64Int(...):NULL arg.\n"); + } +} + + +/// Prints a C string to stderr +/// @param toPrint string to print +void printStr(char* toPrint) { + if (toPrint) { + fprintf(stderr, "%s", toPrint); + } + else { + fprintf(stderr, "::printStr(...):NULL arg.\n"); + } +} + + +/// Deletes the true previosly allocated exception whose address +/// is calculated from the supplied OurBaseException_t::unwindException +/// member address. Handles (ignores), NULL pointers. +/// @param expToDelete exception to delete +void deleteOurException(OurUnwindException* expToDelete) { +#ifdef DEBUG + fprintf(stderr, + "deleteOurException(...).\n"); +#endif + + if (expToDelete && + (expToDelete->exception_class == ourBaseExceptionClass)) { + + free(((char*) expToDelete) + ourBaseFromUnwindOffset); + } +} + + +/// This function is the struct _Unwind_Exception API mandated delete function +/// used by foreign exception handlers when deleting our exception +/// (OurException), instances. +/// @param reason @link http://refspecs.freestandards.org/abi-eh-1.21.html +/// @unlink +/// @param expToDelete exception instance to delete +void deleteFromUnwindOurException(_Unwind_Reason_Code reason, + OurUnwindException* expToDelete) { +#ifdef DEBUG + fprintf(stderr, + "deleteFromUnwindOurException(...).\n"); +#endif + + deleteOurException(expToDelete); +} + + +/// Creates (allocates on the heap), an exception (OurException instance), +/// of the supplied type info type. +/// @param type type info type +OurUnwindException* createOurException(int type) { + size_t size = sizeof(OurException); + OurException* ret = (OurException*) memset(malloc(size), 0, size); + (ret->type).type = type; + (ret->unwindException).exception_class = ourBaseExceptionClass; + (ret->unwindException).exception_cleanup = deleteFromUnwindOurException; + + return(&(ret->unwindException)); +} + + +/// Read a uleb128 encoded value and advance pointer +/// See Variable Length Data in: +/// @link http://dwarfstd.org/Dwarf3.pdf @unlink +/// @param data reference variable holding memory pointer to decode from +/// @returns decoded value +static uintptr_t readULEB128(const uint8_t** data) { + uintptr_t result = 0; + uintptr_t shift = 0; + unsigned char byte; + const uint8_t* p = *data; + + do { + byte = *p++; + result |= (byte & 0x7f) << shift; + shift += 7; + } + while (byte & 0x80); + + *data = p; + + return result; +} + + +/// Read a sleb128 encoded value and advance pointer +/// See Variable Length Data in: +/// @link http://dwarfstd.org/Dwarf3.pdf @unlink +/// @param data reference variable holding memory pointer to decode from +/// @returns decoded value +static uintptr_t readSLEB128(const uint8_t** data) { + uintptr_t result = 0; + uintptr_t shift = 0; + unsigned char byte; + const uint8_t* p = *data; + + do { + byte = *p++; + result |= (byte & 0x7f) << shift; + shift += 7; + } + while (byte & 0x80); + + *data = p; + + if ((byte & 0x40) && (shift < (sizeof(result) << 3))) { + result |= (~0 << shift); + } + + return result; +} + + +/// Read a pointer encoded value and advance pointer +/// See Variable Length Data in: +/// @link http://dwarfstd.org/Dwarf3.pdf @unlink +/// @param data reference variable holding memory pointer to decode from +/// @param encoding dwarf encoding type +/// @returns decoded value +static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding) { + uintptr_t result = 0; + const uint8_t* p = *data; + + if (encoding == llvm::dwarf::DW_EH_PE_omit) + return(result); + + // first get value + switch (encoding & 0x0F) { + case llvm::dwarf::DW_EH_PE_absptr: + result = *((uintptr_t*)p); + p += sizeof(uintptr_t); + break; + case llvm::dwarf::DW_EH_PE_uleb128: + result = readULEB128(&p); + break; + // Note: This case has not been tested + case llvm::dwarf::DW_EH_PE_sleb128: + result = readSLEB128(&p); + break; + case llvm::dwarf::DW_EH_PE_udata2: + result = *((uint16_t*)p); + p += sizeof(uint16_t); + break; + case llvm::dwarf::DW_EH_PE_udata4: + result = *((uint32_t*)p); + p += sizeof(uint32_t); + break; + case llvm::dwarf::DW_EH_PE_udata8: + result = *((uint64_t*)p); + p += sizeof(uint64_t); + break; + case llvm::dwarf::DW_EH_PE_sdata2: + result = *((int16_t*)p); + p += sizeof(int16_t); + break; + case llvm::dwarf::DW_EH_PE_sdata4: + result = *((int32_t*)p); + p += sizeof(int32_t); + break; + case llvm::dwarf::DW_EH_PE_sdata8: + result = *((int64_t*)p); + p += sizeof(int64_t); + break; + default: + // not supported + abort(); + break; + } + + // then add relative offset + switch (encoding & 0x70) { + case llvm::dwarf::DW_EH_PE_absptr: + // do nothing + break; + case llvm::dwarf::DW_EH_PE_pcrel: + result += (uintptr_t)(*data); + break; + case llvm::dwarf::DW_EH_PE_textrel: + case llvm::dwarf::DW_EH_PE_datarel: + case llvm::dwarf::DW_EH_PE_funcrel: + case llvm::dwarf::DW_EH_PE_aligned: + default: + // not supported + abort(); + break; + } + + // then apply indirection + if (encoding & llvm::dwarf::DW_EH_PE_indirect) { + result = *((uintptr_t*)result); + } + + *data = p; + + return result; +} + + +/// Deals with Dwarf actions matching our type infos +/// (OurExceptionType_t instances). Returns whether or not a dwarf emitted +/// action matches the supplied exception type. If such a match succeeds, +/// the resultAction argument will be set with > 0 index value. Only +/// corresponding llvm.eh.selector type info arguments, cleanup arguments +/// are supported. Filters are not supported. +/// See Variable Length Data in: +/// @link http://dwarfstd.org/Dwarf3.pdf @unlink +/// Also see @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink +/// @param resultAction reference variable which will be set with result +/// @param classInfo our array of type info pointers (to globals) +/// @param actionEntry index into above type info array or 0 (clean up). +/// We do not support filters. +/// @param exceptionClass exception class (_Unwind_Exception::exception_class) +/// of thrown exception. +/// @param exceptionObject thrown _Unwind_Exception instance. +/// @returns whether or not a type info was found. False is returned if only +/// a cleanup was found +static bool handleActionValue(int64_t *resultAction, + struct OurExceptionType_t **classInfo, + uintptr_t actionEntry, + uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject) { + bool ret = false; + + if (!resultAction || + !exceptionObject || + (exceptionClass != ourBaseExceptionClass)) + return(ret); + + struct OurBaseException_t* excp = (struct OurBaseException_t*) + (((char*) exceptionObject) + ourBaseFromUnwindOffset); + struct OurExceptionType_t *excpType = &(excp->type); + int type = excpType->type; + +#ifdef DEBUG + fprintf(stderr, + "handleActionValue(...): exceptionObject = <%p>, " + "excp = <%p>.\n", + exceptionObject, + excp); +#endif + + const uint8_t *actionPos = (uint8_t*) actionEntry, + *tempActionPos; + int64_t typeOffset = 0, + actionOffset; + + for (int i = 0; true; ++i) { + // Each emitted dwarf action corresponds to a 2 tuple of + // type info address offset, and action offset to the next + // emitted action. + typeOffset = readSLEB128(&actionPos); + tempActionPos = actionPos; + actionOffset = readSLEB128(&tempActionPos); + +#ifdef DEBUG + fprintf(stderr, + "handleActionValue(...):typeOffset: <%lld>, " + "actionOffset: <%lld>.\n", + typeOffset, + actionOffset); +#endif + assert((typeOffset >= 0) && + "handleActionValue(...):filters are not supported."); + + // Note: A typeOffset == 0 implies that a cleanup llvm.eh.selector + // argument has been matched. + if ((typeOffset > 0) && + (type == (classInfo[-typeOffset])->type)) { +#ifdef DEBUG + fprintf(stderr, + "handleActionValue(...):actionValue <%d> found.\n", + i); +#endif + *resultAction = i + 1; + ret = true; + break; + } + +#ifdef DEBUG + fprintf(stderr, + "handleActionValue(...):actionValue not found.\n"); +#endif + if (!actionOffset) + break; + + actionPos += actionOffset; + } + + return(ret); +} + + +/// Deals with the Language specific data portion of the emitted dwarf code. +/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink +/// @param version unsupported (ignored), unwind version +/// @param lsda language specific data area +/// @param _Unwind_Action actions minimally supported unwind stage +/// (forced specifically not supported) +/// @param exceptionClass exception class (_Unwind_Exception::exception_class) +/// of thrown exception. +/// @param exceptionObject thrown _Unwind_Exception instance. +/// @param context unwind system context +/// @returns minimally supported unwinding control indicator +static _Unwind_Reason_Code handleLsda(int version, + const uint8_t* lsda, + _Unwind_Action actions, + uint64_t exceptionClass, + struct _Unwind_Exception* exceptionObject, + _Unwind_Context_t context) { + _Unwind_Reason_Code ret = _URC_CONTINUE_UNWIND; + + if (!lsda) + return(ret); + +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...):lsda is non-zero.\n"); +#endif + + // Get the current instruction pointer and offset it before next + // instruction in the current frame which threw the exception. + uintptr_t pc = _Unwind_GetIP(context)-1; + + // Get beginning current frame's code (as defined by the + // emitted dwarf code) + uintptr_t funcStart = _Unwind_GetRegionStart(context); + uintptr_t pcOffset = pc - funcStart; + struct OurExceptionType_t** classInfo = NULL; + + // Note: See JITDwarfEmitter::EmitExceptionTable(...) for corresponding + // dwarf emission + + // Parse LSDA header. + uint8_t lpStartEncoding = *lsda++; + + if (lpStartEncoding != llvm::dwarf::DW_EH_PE_omit) { + readEncodedPointer(&lsda, lpStartEncoding); + } + + uint8_t ttypeEncoding = *lsda++; + uintptr_t classInfoOffset; + + if (ttypeEncoding != llvm::dwarf::DW_EH_PE_omit) { + // Calculate type info locations in emitted dwarf code which + // were flagged by type info arguments to llvm.eh.selector + // intrinsic + classInfoOffset = readULEB128(&lsda); + classInfo = (struct OurExceptionType_t**) (lsda + classInfoOffset); + } + + // Walk call-site table looking for range that + // includes current PC. + + uint8_t callSiteEncoding = *lsda++; + uint32_t callSiteTableLength = readULEB128(&lsda); + const uint8_t* callSiteTableStart = lsda; + const uint8_t* callSiteTableEnd = callSiteTableStart + + callSiteTableLength; + const uint8_t* actionTableStart = callSiteTableEnd; + const uint8_t* callSitePtr = callSiteTableStart; + + bool foreignException = false; + + while (callSitePtr < callSiteTableEnd) { + uintptr_t start = readEncodedPointer(&callSitePtr, + callSiteEncoding); + uintptr_t length = readEncodedPointer(&callSitePtr, + callSiteEncoding); + uintptr_t landingPad = readEncodedPointer(&callSitePtr, + callSiteEncoding); + + // Note: Action value + uintptr_t actionEntry = readULEB128(&callSitePtr); + + if (exceptionClass != ourBaseExceptionClass) { + // We have been notified of a foreign exception being thrown, + // and we therefore need to execute cleanup landing pads + actionEntry = 0; + foreignException = true; + } + + if (landingPad == 0) { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...): No landing pad found.\n"); +#endif + + continue; // no landing pad for this entry + } + + if (actionEntry) { + actionEntry += ((uintptr_t) actionTableStart) - 1; + } + else { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...):No action table found.\n"); +#endif + } + + bool exceptionMatched = false; + + if ((start <= pcOffset) && (pcOffset < (start + length))) { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...): Landing pad found.\n"); +#endif + int64_t actionValue = 0; + + if (actionEntry) { + exceptionMatched = handleActionValue + ( + &actionValue, + classInfo, + actionEntry, + exceptionClass, + exceptionObject + ); + } + + if (!(actions & _UA_SEARCH_PHASE)) { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...): installed landing pad " + "context.\n"); +#endif + + // Found landing pad for the PC. + // Set Instruction Pointer to so we re-enter function + // at landing pad. The landing pad is created by the + // compiler to take two parameters in registers. + _Unwind_SetGR(context, + __builtin_eh_return_data_regno(0), + (uintptr_t)exceptionObject); + + // Note: this virtual register directly corresponds + // to the return of the llvm.eh.selector intrinsic + if (!actionEntry || !exceptionMatched) { + // We indicate cleanup only + _Unwind_SetGR(context, + __builtin_eh_return_data_regno(1), + 0); + } + else { + // Matched type info index of llvm.eh.selector intrinsic + // passed here. + _Unwind_SetGR(context, + __builtin_eh_return_data_regno(1), + actionValue); + } + + // To execute landing pad set here + _Unwind_SetIP(context, funcStart + landingPad); + ret = _URC_INSTALL_CONTEXT; + } + else if (exceptionMatched) { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...): setting handler found.\n"); +#endif + ret = _URC_HANDLER_FOUND; + } + else { + // Note: Only non-clean up handlers are marked as + // found. Otherwise the clean up handlers will be + // re-found and executed during the clean up + // phase. +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...): cleanup handler found.\n"); +#endif + } + + break; + } + } + + return(ret); +} + + +/// This is the personality function which is embedded (dwarf emitted), in the +/// dwarf unwind info block. Again see: JITDwarfEmitter.cpp. +/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink +/// @param version unsupported (ignored), unwind version +/// @param _Unwind_Action actions minimally supported unwind stage +/// (forced specifically not supported) +/// @param exceptionClass exception class (_Unwind_Exception::exception_class) +/// of thrown exception. +/// @param exceptionObject thrown _Unwind_Exception instance. +/// @param context unwind system context +/// @returns minimally supported unwinding control indicator +_Unwind_Reason_Code ourPersonality(int version, + _Unwind_Action actions, + uint64_t exceptionClass, + struct _Unwind_Exception* exceptionObject, + _Unwind_Context_t context) { +#ifdef DEBUG + fprintf(stderr, + "We are in ourPersonality(...):actions is <%d>.\n", + actions); + + if (actions & _UA_SEARCH_PHASE) { + fprintf(stderr, "ourPersonality(...):In search phase.\n"); + } + else { + fprintf(stderr, "ourPersonality(...):In non-search phase.\n"); + } +#endif + + const uint8_t* lsda = (uint8_t*) + _Unwind_GetLanguageSpecificData(context); + +#ifdef DEBUG + fprintf(stderr, + "ourPersonality(...):lsda = <%p>.\n", + lsda); +#endif + + // The real work of the personality function is captured here + return(handleLsda(version, + lsda, + actions, + exceptionClass, + exceptionObject, + context)); +} + + +/// Generates our _Unwind_Exception class from a given character array. +/// thereby handling arbitrary lengths (not in standard), and handling +/// embedded \0s. +/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink +/// @param classChars char array to encode. NULL values not checkedf +/// @param classCharsSize number of chars in classChars. Value is not checked. +/// @returns class value +uint64_t genClass(const unsigned char classChars[], size_t classCharsSize) +{ + uint64_t ret = classChars[0]; + + for (unsigned i = 1; i < classCharsSize; ++i) { + ret <<= 8; + ret += classChars[i]; + } + + return(ret); +} + +} // extern "C" + +// +// Runtime C Library functions End +// + +// +// Code generation functions +// + +/// Generates code to print given constant string +/// @param context llvm context +/// @param module code for module instance +/// @param builder builder instance +/// @param toPrint string to print +/// @param useGlobal A value of true (default) indicates a GlobalValue is +/// generated, and is used to hold the constant string. A value of +/// false indicates that the constant string will be stored on the +/// stack. +void generateStringPrint(llvm::LLVMContext& context, + llvm::Module& module, + llvm::IRBuilder<>& builder, + std::string toPrint, + bool useGlobal = true) { + llvm::Function *printFunct = module.getFunction("printStr"); + + llvm::Value *stringVar; + llvm::Constant* stringConstant = + llvm::ConstantArray::get(context, toPrint); + + if (useGlobal) { + // Note: Does not work without allocation + stringVar = + new llvm::GlobalVariable(module, + stringConstant->getType(), + true, + llvm::GlobalValue::LinkerPrivateLinkage, + stringConstant, + ""); + } + else { + stringVar = builder.CreateAlloca(stringConstant->getType()); + builder.CreateStore(stringConstant, stringVar); + } + + llvm::Value* cast = + builder.CreatePointerCast(stringVar, + builder.getInt8Ty()->getPointerTo()); + builder.CreateCall(printFunct, cast); +} + + +/// Generates code to print given runtime integer according to constant +/// string format, and a given print function. +/// @param context llvm context +/// @param module code for module instance +/// @param builder builder instance +/// @param printFunct function used to "print" integer +/// @param toPrint string to print +/// @param format printf like formating string for print +/// @param useGlobal A value of true (default) indicates a GlobalValue is +/// generated, and is used to hold the constant string. A value of +/// false indicates that the constant string will be stored on the +/// stack. +void generateIntegerPrint(llvm::LLVMContext& context, + llvm::Module& module, + llvm::IRBuilder<>& builder, + llvm::Function& printFunct, + llvm::Value& toPrint, + std::string format, + bool useGlobal = true) { + llvm::Constant *stringConstant = llvm::ConstantArray::get(context, format); + llvm::Value *stringVar; + + if (useGlobal) { + // Note: Does not seem to work without allocation + stringVar = + new llvm::GlobalVariable(module, + stringConstant->getType(), + true, + llvm::GlobalValue::LinkerPrivateLinkage, + stringConstant, + ""); + } + else { + stringVar = builder.CreateAlloca(stringConstant->getType()); + builder.CreateStore(stringConstant, stringVar); + } + + llvm::Value* cast = + builder.CreateBitCast(stringVar, + builder.getInt8Ty()->getPointerTo()); + builder.CreateCall2(&printFunct, &toPrint, cast); +} + + +/// Generates code to handle finally block type semantics: always runs +/// regardless of whether a thrown exception is passing through or the +/// parent function is simply exiting. In addition to printing some state +/// to stderr, this code will resume the exception handling--runs the +/// unwind resume block, if the exception has not been previously caught +/// by a catch clause, and will otherwise execute the end block (terminator +/// block). In addition this function creates the corresponding function's +/// stack storage for the exception pointer and catch flag status. +/// @param context llvm context +/// @param module code for module instance +/// @param builder builder instance +/// @param toAddTo parent function to add block to +/// @param blockName block name of new "finally" block. +/// @param functionId output id used for printing +/// @param terminatorBlock terminator "end" block +/// @param unwindResumeBlock unwind resume block +/// @param exceptionCaughtFlag reference exception caught/thrown status storage +/// @param exceptionStorage reference to exception pointer storage +/// @returns newly created block +static llvm::BasicBlock* createFinallyBlock(llvm::LLVMContext& context, + llvm::Module& module, + llvm::IRBuilder<>& builder, + llvm::Function& toAddTo, + std::string& blockName, + std::string& functionId, + llvm::BasicBlock& terminatorBlock, + llvm::BasicBlock& unwindResumeBlock, + llvm::Value** exceptionCaughtFlag, + llvm::Value** exceptionStorage) { + assert(exceptionCaughtFlag && + "ExceptionDemo::createFinallyBlock(...):exceptionCaughtFlag " + "is NULL"); + assert(exceptionStorage && + "ExceptionDemo::createFinallyBlock(...):exceptionStorage " + "is NULL"); + + *exceptionCaughtFlag = + createEntryBlockAlloca(toAddTo, + "exceptionCaught", + ourExceptionNotThrownState->getType(), + ourExceptionNotThrownState); + + const llvm::PointerType* exceptionStorageType = + builder.getInt8Ty()->getPointerTo(); + *exceptionStorage = + createEntryBlockAlloca(toAddTo, + "exceptionStorage", + exceptionStorageType, + llvm::ConstantPointerNull::get( + exceptionStorageType)); + + llvm::BasicBlock *ret = llvm::BasicBlock::Create(context, + blockName, + &toAddTo); + + builder.SetInsertPoint(ret); + + std::ostringstream bufferToPrint; + bufferToPrint << "Gen: Executing finally block " + << blockName + << " in " + << functionId + << std::endl; + generateStringPrint(context, + module, + builder, + bufferToPrint.str(), + USE_GLOBAL_STR_CONSTS); + + llvm::SwitchInst* theSwitch = + builder.CreateSwitch(builder.CreateLoad(*exceptionCaughtFlag), + &terminatorBlock, + 2); + theSwitch->addCase(ourExceptionCaughtState, &terminatorBlock); + theSwitch->addCase(ourExceptionThrownState, &unwindResumeBlock); + + return(ret); +} + + +/// Generates catch block semantics which print a string to indicate type of +/// catch executed, sets an exception caught flag, and executes passed in +/// end block (terminator block). +/// @param context llvm context +/// @param module code for module instance +/// @param builder builder instance +/// @param toAddTo parent function to add block to +/// @param blockName block name of new "catch" block. +/// @param functionId output id used for printing +/// @param terminatorBlock terminator "end" block +/// @param exceptionCaughtFlag exception caught/thrown status +/// @returns newly created block +static llvm::BasicBlock* createCatchBlock(llvm::LLVMContext& context, + llvm::Module& module, + llvm::IRBuilder<>& builder, + llvm::Function& toAddTo, + std::string& blockName, + std::string& functionId, + llvm::BasicBlock& terminatorBlock, + llvm::Value& exceptionCaughtFlag) { + + llvm::BasicBlock *ret = llvm::BasicBlock::Create(context, + blockName, + &toAddTo); + + builder.SetInsertPoint(ret); + + std::ostringstream bufferToPrint; + bufferToPrint << "Gen: Executing catch block " + << blockName + << " in " + << functionId + << std::endl; + generateStringPrint(context, + module, + builder, + bufferToPrint.str(), + USE_GLOBAL_STR_CONSTS); + builder.CreateStore(ourExceptionCaughtState, &exceptionCaughtFlag); + builder.CreateBr(&terminatorBlock); + + return(ret); +} + + +/// Generates a function which invokes a function (toInvoke) and, whose +/// unwind block will "catch" the type info types correspondingly held in the +/// exceptionTypesToCatch argument. If the toInvoke function throws an +/// exception which does not match any type info types contained in +/// exceptionTypesToCatch, the generated code will call _Unwind_Resume +/// with the raised exception. On the other hand the generated code will +/// normally exit if the toInvoke function does not throw an exception. +/// The generated "finally" block is always run regardless of the cause of +/// the generated function exit. +/// The generated function is returned after being verified. +/// @param module code for module instance +/// @param builder builder instance +/// @param fpm a function pass manager holding optional IR to IR +/// transformations +/// @param toInvoke inner function to invoke +/// @param ourId id used to printing purposes +/// @param numExceptionsToCatch length of exceptionTypesToCatch array +/// @param exceptionTypesToCatch array of type info types to "catch" +/// @returns generated function +static +llvm::Function* createCatchWrappedInvokeFunction(llvm::Module& module, + llvm::IRBuilder<>& builder, + llvm::FunctionPassManager& fpm, + llvm::Function& toInvoke, + std::string ourId, + unsigned numExceptionsToCatch, + unsigned exceptionTypesToCatch[]) { + + llvm::LLVMContext& context = module.getContext(); + llvm::Function *toPrint32Int = module.getFunction("print32Int"); + + ArgTypes argTypes; + argTypes.push_back(builder.getInt32Ty()); + + ArgNames argNames; + argNames.push_back("exceptTypeToThrow"); + + llvm::Function* ret = createFunction(module, + builder.getVoidTy(), + argTypes, + argNames, + ourId, + llvm::Function::ExternalLinkage, + false, + false); + + // Block which calls invoke + llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context, + "entry", + ret); + // Normal block for invoke + llvm::BasicBlock *normalBlock = llvm::BasicBlock::Create(context, + "normal", + ret); + // Unwind block for invoke + llvm::BasicBlock *exceptionBlock = + llvm::BasicBlock::Create(context, "exception", ret); + + // Block which routes exception to correct catch handler block + llvm::BasicBlock *exceptionRouteBlock = + llvm::BasicBlock::Create(context, "exceptionRoute", ret); + + // Foreign exception handler + llvm::BasicBlock *externalExceptionBlock = + llvm::BasicBlock::Create(context, "externalException", ret); + + // Block which calls _Unwind_Resume + llvm::BasicBlock *unwindResumeBlock = + llvm::BasicBlock::Create(context, "unwindResume", ret); + + // Clean up block which delete exception if needed + llvm::BasicBlock *endBlock = + llvm::BasicBlock::Create(context, "end", ret); + + std::string nextName; + std::vector<llvm::BasicBlock*> catchBlocks(numExceptionsToCatch); + llvm::Value* exceptionCaughtFlag = NULL; + llvm::Value* exceptionStorage = NULL; + + // Finally block which will branch to unwindResumeBlock if + // exception is not caught. Initializes/allocates stack locations. + llvm::BasicBlock* finallyBlock = createFinallyBlock(context, + module, + builder, + *ret, + nextName = "finally", + ourId, + *endBlock, + *unwindResumeBlock, + &exceptionCaughtFlag, + &exceptionStorage); + + for (unsigned i = 0; i < numExceptionsToCatch; ++i) { + nextName = ourTypeInfoNames[exceptionTypesToCatch[i]]; + + // One catch block per type info to be caught + catchBlocks[i] = createCatchBlock(context, + module, + builder, + *ret, + nextName, + ourId, + *finallyBlock, + *exceptionCaughtFlag); + } + + // Entry Block + + builder.SetInsertPoint(entryBlock); + + std::vector<llvm::Value*> args; + args.push_back(namedValues["exceptTypeToThrow"]); + builder.CreateInvoke(&toInvoke, + normalBlock, + exceptionBlock, + args.begin(), + args.end()); + + // End Block + + builder.SetInsertPoint(endBlock); + + generateStringPrint(context, + module, + builder, + "Gen: In end block: exiting in " + ourId + ".\n", + USE_GLOBAL_STR_CONSTS); + llvm::Function *deleteOurException = + module.getFunction("deleteOurException"); + + // Note: function handles NULL exceptions + builder.CreateCall(deleteOurException, + builder.CreateLoad(exceptionStorage)); + builder.CreateRetVoid(); + + // Normal Block + + builder.SetInsertPoint(normalBlock); + + generateStringPrint(context, + module, + builder, + "Gen: No exception in " + ourId + "!\n", + USE_GLOBAL_STR_CONSTS); + + // Finally block is always called + builder.CreateBr(finallyBlock); + + // Unwind Resume Block + + builder.SetInsertPoint(unwindResumeBlock); + + llvm::Function *resumeOurException = + module.getFunction("_Unwind_Resume"); + builder.CreateCall(resumeOurException, + builder.CreateLoad(exceptionStorage)); + builder.CreateUnreachable(); + + // Exception Block + + builder.SetInsertPoint(exceptionBlock); + + llvm::Function *ehException = module.getFunction("llvm.eh.exception"); + + // Retrieve thrown exception + llvm::Value* unwindException = builder.CreateCall(ehException); + + // Store exception and flag + builder.CreateStore(unwindException, exceptionStorage); + builder.CreateStore(ourExceptionThrownState, exceptionCaughtFlag); + llvm::Function *personality = module.getFunction("ourPersonality"); + llvm::Value* functPtr = + builder.CreatePointerCast(personality, + builder.getInt8Ty()->getPointerTo()); + + args.clear(); + args.push_back(unwindException); + args.push_back(functPtr); + + // Note: Skipping index 0 + for (unsigned i = 0; i < numExceptionsToCatch; ++i) { + // Set up type infos to be caught + args.push_back( + module.getGlobalVariable( + ourTypeInfoNames[exceptionTypesToCatch[i]])); + } + + args.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), 0)); + + llvm::Function *ehSelector = module.getFunction("llvm.eh.selector"); + + // Set up this exeption block as the landing pad which will handle + // given type infos. See case Intrinsic::eh_selector in + // SelectionDAGBuilder::visitIntrinsicCall(...) and AddCatchInfo(...) + // implemented in FunctionLoweringInfo.cpp to see how the implementation + // handles this call. This landing pad (this exception block), will be + // called either because it nees to cleanup (call finally) or a type + // info was found which matched the thrown exception. + llvm::Value* retTypeInfoIndex = builder.CreateCall(ehSelector, + args.begin(), + args.end()); + + // Retrieve exception_class member from thrown exception + // (_Unwind_Exception instance). This member tells us whether or not + // the exception is foreign. + llvm::Value* unwindExceptionClass = + builder.CreateLoad( + builder.CreateStructGEP( + builder.CreatePointerCast( + unwindException, + ourUnwindExceptionType->getPointerTo()), + 0)); + + // Branch to the externalExceptionBlock if the exception is foreign or + // to a catch router if not. Either way the finally block will be run. + builder.CreateCondBr( + builder.CreateICmpEQ(unwindExceptionClass, + llvm::ConstantInt::get(builder.getInt64Ty(), + ourBaseExceptionClass)), + exceptionRouteBlock, + externalExceptionBlock); + + // External Exception Block + + builder.SetInsertPoint(externalExceptionBlock); + + generateStringPrint(context, + module, + builder, + "Gen: Foreign exception received.\n", + USE_GLOBAL_STR_CONSTS); + + // Branch to the finally block + builder.CreateBr(finallyBlock); + + // Exception Route Block + + builder.SetInsertPoint(exceptionRouteBlock); + + // Casts exception pointer (_Unwind_Exception instance) to parent + // (OurException instance). + // + // Note: ourBaseFromUnwindOffset is usually negative + llvm::Value* typeInfoThrown = + builder.CreatePointerCast( + builder.CreateConstGEP1_64(unwindException, + ourBaseFromUnwindOffset), + ourExceptionType->getPointerTo()); + + // Retrieve thrown exception type info type + // + // Note: Index is not relative to pointer but instead to structure + // unlike a true getelementptr (GEP) instruction + typeInfoThrown = builder.CreateStructGEP(typeInfoThrown, 0); + + llvm::Value* typeInfoThrownType = + builder.CreateStructGEP(typeInfoThrown, 0); + + generateIntegerPrint(context, + module, + builder, + *toPrint32Int, + *(builder.CreateLoad(typeInfoThrownType)), + "Gen: Exception type <%d> received (stack unwound) " + " in " + + ourId + + ".\n", + USE_GLOBAL_STR_CONSTS); + + // Route to matched type info catch block or run cleanup finally block + llvm::SwitchInst* switchToCatchBlock = + builder.CreateSwitch(retTypeInfoIndex, + finallyBlock, + numExceptionsToCatch); + + unsigned nextTypeToCatch; + + for (unsigned i = 1; i <= numExceptionsToCatch; ++i) { + nextTypeToCatch = i - 1; + switchToCatchBlock->addCase(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(context), + i), + catchBlocks[nextTypeToCatch]); + } + + llvm::verifyFunction(*ret); + fpm.run(*ret); + + return(ret); +} + + +/// Generates function which throws either an exception matched to a runtime +/// determined type info type (argument to generated function), or if this +/// runtime value matches nativeThrowType, throws a foreign exception by +/// calling nativeThrowFunct. +/// @param module code for module instance +/// @param builder builder instance +/// @param fpm a function pass manager holding optional IR to IR +/// transformations +/// @param ourId id used to printing purposes +/// @param nativeThrowType a runtime argument of this value results in +/// nativeThrowFunct being called to generate/throw exception. +/// @param nativeThrowFunct function which will throw a foreign exception +/// if the above nativeThrowType matches generated function's arg. +/// @returns generated function +static +llvm::Function* createThrowExceptionFunction(llvm::Module& module, + llvm::IRBuilder<>& builder, + llvm::FunctionPassManager& fpm, + std::string ourId, + int32_t nativeThrowType, + llvm::Function& nativeThrowFunct) { + llvm::LLVMContext& context = module.getContext(); + namedValues.clear(); + ArgTypes unwindArgTypes; + unwindArgTypes.push_back(builder.getInt32Ty()); + ArgNames unwindArgNames; + unwindArgNames.push_back("exceptTypeToThrow"); + + llvm::Function *ret = createFunction(module, + builder.getVoidTy(), + unwindArgTypes, + unwindArgNames, + ourId, + llvm::Function::ExternalLinkage, + false, + false); + + // Throws either one of our exception or a native C++ exception depending + // on a runtime argument value containing a type info type. + llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context, + "entry", + ret); + // Throws a foreign exception + llvm::BasicBlock *nativeThrowBlock = + llvm::BasicBlock::Create(context, + "nativeThrow", + ret); + // Throws one of our Exceptions + llvm::BasicBlock *generatedThrowBlock = + llvm::BasicBlock::Create(context, + "generatedThrow", + ret); + // Retrieved runtime type info type to throw + llvm::Value* exceptionType = namedValues["exceptTypeToThrow"]; + + // nativeThrowBlock block + + builder.SetInsertPoint(nativeThrowBlock); + + // Throws foreign exception + builder.CreateCall(&nativeThrowFunct, exceptionType); + builder.CreateUnreachable(); + + // entry block + + builder.SetInsertPoint(entryBlock); + + llvm::Function *toPrint32Int = module.getFunction("print32Int"); + generateIntegerPrint(context, + module, + builder, + *toPrint32Int, + *exceptionType, + "\nGen: About to throw exception type <%d> in " + + ourId + + ".\n", + USE_GLOBAL_STR_CONSTS); + + // Switches on runtime type info type value to determine whether or not + // a foreign exception is thrown. Defaults to throwing one of our + // generated exceptions. + llvm::SwitchInst* theSwitch = builder.CreateSwitch(exceptionType, + generatedThrowBlock, + 1); + + theSwitch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), + nativeThrowType), + nativeThrowBlock); + + // generatedThrow block + + builder.SetInsertPoint(generatedThrowBlock); + + llvm::Function *createOurException = + module.getFunction("createOurException"); + llvm::Function *raiseOurException = + module.getFunction("_Unwind_RaiseException"); + + // Creates exception to throw with runtime type info type. + llvm::Value* exception = + builder.CreateCall(createOurException, + namedValues["exceptTypeToThrow"]); + + // Throw generated Exception + builder.CreateCall(raiseOurException, exception); + builder.CreateUnreachable(); + + llvm::verifyFunction(*ret); + fpm.run(*ret); + + return(ret); +} + +static void createStandardUtilityFunctions(unsigned numTypeInfos, + llvm::Module& module, + llvm::IRBuilder<>& builder); + +/// Creates test code by generating and organizing these functions into the +/// test case. The test case consists of an outer function setup to invoke +/// an inner function within an environment having multiple catch and single +/// finally blocks. This inner function is also setup to invoke a throw +/// function within an evironment similar in nature to the outer function's +/// catch and finally blocks. Each of these two functions catch mutually +/// exclusive subsets (even or odd) of the type info types configured +/// for this this. All generated functions have a runtime argument which +/// holds a type info type to throw that each function takes and passes it +/// to the inner one if such a inner function exists. This type info type is +/// looked at by the generated throw function to see whether or not it should +/// throw a generated exception with the same type info type, or instead call +/// a supplied a function which in turn will throw a foreign exception. +/// @param module code for module instance +/// @param builder builder instance +/// @param fpm a function pass manager holding optional IR to IR +/// transformations +/// @param nativeThrowFunctName name of external function which will throw +/// a foreign exception +/// @returns outermost generated test function. +llvm::Function* createUnwindExceptionTest(llvm::Module& module, + llvm::IRBuilder<>& builder, + llvm::FunctionPassManager& fpm, + std::string nativeThrowFunctName) { + // Number of type infos to generate + unsigned numTypeInfos = 6; + + // Initialze intrisics and external functions to use along with exception + // and type info globals. + createStandardUtilityFunctions(numTypeInfos, + module, + builder); + llvm::Function *nativeThrowFunct = + module.getFunction(nativeThrowFunctName); + + // Create exception throw function using the value ~0 to cause + // foreign exceptions to be thrown. + llvm::Function* throwFunct = + createThrowExceptionFunction(module, + builder, + fpm, + "throwFunct", + ~0, + *nativeThrowFunct); + // Inner function will catch even type infos + unsigned innerExceptionTypesToCatch[] = {6, 2, 4}; + size_t numExceptionTypesToCatch = sizeof(innerExceptionTypesToCatch) / + sizeof(unsigned); + + // Generate inner function. + llvm::Function* innerCatchFunct = + createCatchWrappedInvokeFunction(module, + builder, + fpm, + *throwFunct, + "innerCatchFunct", + numExceptionTypesToCatch, + innerExceptionTypesToCatch); + + // Outer function will catch odd type infos + unsigned outerExceptionTypesToCatch[] = {3, 1, 5}; + numExceptionTypesToCatch = sizeof(outerExceptionTypesToCatch) / + sizeof(unsigned); + + // Generate outer function + llvm::Function* outerCatchFunct = + createCatchWrappedInvokeFunction(module, + builder, + fpm, + *innerCatchFunct, + "outerCatchFunct", + numExceptionTypesToCatch, + outerExceptionTypesToCatch); + + // Return outer function to run + return(outerCatchFunct); +} + + +/// Represents our foreign exceptions +class OurCppRunException : public std::runtime_error { +public: + OurCppRunException(const std::string reason) : + std::runtime_error(reason) {} + + OurCppRunException (const OurCppRunException& toCopy) : + std::runtime_error(toCopy) {} + + OurCppRunException& operator = (const OurCppRunException& toCopy) { + return(reinterpret_cast<OurCppRunException&>( + std::runtime_error::operator = (toCopy) + )); + } + + ~OurCppRunException (void) throw () {}; +}; + + +/// Throws foreign C++ exception. +/// @param ignoreIt unused parameter that allows function to match implied +/// generated function contract. +extern "C" +void throwCppException (int32_t ignoreIt) { + throw(OurCppRunException("thrown by throwCppException(...)")); +} + +typedef void (*OurExceptionThrowFunctType) (int32_t typeToThrow); + +/// This is a test harness which runs test by executing generated +/// function with a type info type to throw. Harness wraps the excecution +/// of generated function in a C++ try catch clause. +/// @param engine execution engine to use for executing generated function. +/// This demo program expects this to be a JIT instance for demo +/// purposes. +/// @param function generated test function to run +/// @param typeToThrow type info type of generated exception to throw, or +/// indicator to cause foreign exception to be thrown. +static +void runExceptionThrow(llvm::ExecutionEngine* engine, + llvm::Function* function, + int32_t typeToThrow) { + + // Find test's function pointer + OurExceptionThrowFunctType functPtr = + reinterpret_cast<OurExceptionThrowFunctType>( + reinterpret_cast<intptr_t>( + engine->getPointerToFunction(function) + ) + ); + + try { + // Run test + (*functPtr)(typeToThrow); + } + catch (OurCppRunException exc) { + // Catch foreign C++ exception + fprintf(stderr, + "\nrunExceptionThrow(...):In C++ catch OurCppRunException " + "with reason: %s.\n", + exc.what()); + } + catch (...) { + // Catch all exceptions including our generated ones. I'm not sure + // why this latter functionality should work, as it seems that + // our exceptions should be foreign to C++ (the _Unwind_Exception:: + // exception_class should be different from the one used by C++), and + // therefore C++ should ignore the generated exceptions. + + fprintf(stderr, + "\nrunExceptionThrow(...):In C++ catch all.\n"); + } +} + +// +// End test functions +// + +/// This initialization routine creates type info globals and +/// adds external function declarations to module. +/// @param numTypeInfos number of linear type info associated type info types +/// to create as GlobalVariable instances, starting with the value 1. +/// @param module code for module instance +/// @param builder builder instance +static void createStandardUtilityFunctions(unsigned numTypeInfos, + llvm::Module& module, + llvm::IRBuilder<>& builder) { + + llvm::LLVMContext& context = module.getContext(); + + // Exception initializations + + // Setup exception catch state + ourExceptionNotThrownState = + llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 0), + ourExceptionThrownState = + llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 1), + ourExceptionCaughtState = + llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 2), + + + // Create our type info type + ourTypeInfoType = llvm::StructType::get(context, + builder.getInt32Ty(), + NULL); + + // Create OurException type + ourExceptionType = llvm::StructType::get(context, + ourTypeInfoType, + NULL); + + // Create portion of _Unwind_Exception type + // + // Note: Declaring only a portion of the _Unwind_Exception struct. + // Does this cause problems? + ourUnwindExceptionType = llvm::StructType::get(context, + builder.getInt64Ty(), + NULL); + struct OurBaseException_t dummyException; + + // Calculate offset of OurException::unwindException member. + ourBaseFromUnwindOffset = ((uintptr_t) &dummyException) - + ((uintptr_t) &(dummyException.unwindException)); + +#ifdef DEBUG + fprintf(stderr, + "createStandardUtilityFunctions(...):ourBaseFromUnwindOffset " + "= %lld, sizeof(struct OurBaseException_t) - " + "sizeof(struct _Unwind_Exception) = %lu.\n", + ourBaseFromUnwindOffset, + sizeof(struct OurBaseException_t) - + sizeof(struct _Unwind_Exception)); +#endif + + size_t numChars = sizeof(ourBaseExcpClassChars) / sizeof(char); + + // Create our _Unwind_Exception::exception_class value + ourBaseExceptionClass = genClass(ourBaseExcpClassChars, numChars); + + // Type infos + + std::string baseStr = "typeInfo", typeInfoName; + std::ostringstream typeInfoNameBuilder; + std::vector<llvm::Constant*> structVals; + + llvm::Constant *nextStruct; + llvm::GlobalVariable* nextGlobal = NULL; + + // Generate each type info + // + // Note: First type info is not used. + for (unsigned i = 0; i <= numTypeInfos; ++i) { + structVals.clear(); + structVals.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), i)); + nextStruct = llvm::ConstantStruct::get(ourTypeInfoType, structVals); + + typeInfoNameBuilder.str(""); + typeInfoNameBuilder << baseStr << i; + typeInfoName = typeInfoNameBuilder.str(); + + // Note: Does not seem to work without allocation + nextGlobal = + new llvm::GlobalVariable(module, + ourTypeInfoType, + true, + llvm::GlobalValue::ExternalLinkage, + nextStruct, + typeInfoName); + + ourTypeInfoNames.push_back(typeInfoName); + ourTypeInfoNamesIndex[i] = typeInfoName; + } + + ArgNames argNames; + ArgTypes argTypes; + llvm::Function* funct = NULL; + + // print32Int + + const llvm::Type* retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt32Ty()); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "print32Int", + llvm::Function::ExternalLinkage, + true, + false); + + // print64Int + + retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt64Ty()); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "print64Int", + llvm::Function::ExternalLinkage, + true, + false); + + // printStr + + retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "printStr", + llvm::Function::ExternalLinkage, + true, + false); + + // throwCppException + + retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt32Ty()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "throwCppException", + llvm::Function::ExternalLinkage, + true, + false); + + // deleteOurException + + retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "deleteOurException", + llvm::Function::ExternalLinkage, + true, + false); + + // createOurException + + retType = builder.getInt8Ty()->getPointerTo(); + + argTypes.clear(); + argTypes.push_back(builder.getInt32Ty()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "createOurException", + llvm::Function::ExternalLinkage, + true, + false); + + // _Unwind_RaiseException + + retType = builder.getInt32Ty(); + + argTypes.clear(); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + funct = createFunction(module, + retType, + argTypes, + argNames, + "_Unwind_RaiseException", + llvm::Function::ExternalLinkage, + true, + false); + + funct->addFnAttr(llvm::Attribute::NoReturn); + + // _Unwind_Resume + + retType = builder.getInt32Ty(); + + argTypes.clear(); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + funct = createFunction(module, + retType, + argTypes, + argNames, + "_Unwind_Resume", + llvm::Function::ExternalLinkage, + true, + false); + + funct->addFnAttr(llvm::Attribute::NoReturn); + + // ourPersonality + + retType = builder.getInt32Ty(); + + argTypes.clear(); + argTypes.push_back(builder.getInt32Ty()); + argTypes.push_back(builder.getInt32Ty()); + argTypes.push_back(builder.getInt64Ty()); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "ourPersonality", + llvm::Function::ExternalLinkage, + true, + false); + + // llvm.eh.selector intrinsic + + getDeclaration(&module, llvm::Intrinsic::eh_selector); + + // llvm.eh.exception intrinsic + + getDeclaration(&module, llvm::Intrinsic::eh_exception); + + // llvm.eh.typeid.for intrinsic + + getDeclaration(&module, llvm::Intrinsic::eh_typeid_for); +} + + +//===---------------------------------------------------------------------===// +// Main test driver code. +//===---------------------------------------------------------------------===// + +/// Demo main routine which takes the type info types to throw. A test will +/// be run for each given type info type. While type info types with the value +/// of -1 will trigger a foreign C++ exception to be thrown; type info types +/// <= 6 and >= 1 will be caught by test functions; and type info types > 6 +/// will result in exceptions which pass through to the test harness. All other +/// type info types are not supported and could cause a crash. +int main(int argc, char* argv[]) { + if (argc == 1) { + fprintf(stderr, + "\nUsage: ExceptionDemo <exception type to throw> " + "[<type 2>...<type n>].\n" + " Each type must have the value of 1 - 6 for " + "generated exceptions to be caught;\n" + " the value -1 for foreign C++ exceptions to be " + "generated and thrown;\n" + " or the values > 6 for exceptions to be ignored.\n" + "\nTry: ExceptionDemo 2 3 7 -1\n" + " for a full test.\n\n"); + return(0); + } + + // If not set, exception handling will not be turned on + llvm::DwarfExceptionHandling = true; + + llvm::InitializeNativeTarget(); + llvm::LLVMContext& context = llvm::getGlobalContext(); + llvm::IRBuilder<> theBuilder(context); + + // Make the module, which holds all the code. + llvm::Module* module = new llvm::Module("my cool jit", context); + + // Build engine with JIT + llvm::EngineBuilder factory(module); + factory.setEngineKind(llvm::EngineKind::JIT); + factory.setAllocateGVsWithCode(false); + llvm::ExecutionEngine* executionEngine = factory.create(); + + { + llvm::FunctionPassManager fpm(module); + + // Set up the optimizer pipeline. + // Start with registering info about how the + // target lays out data structures. + fpm.add(new llvm::TargetData(*executionEngine->getTargetData())); + + // Optimizations turned on +#ifdef ADD_OPT_PASSES + + // Promote allocas to registers. + fpm.add(llvm::createPromoteMemoryToRegisterPass()); + + // Do simple "peephole" optimizations and bit-twiddling optzns. + fpm.add(llvm::createInstructionCombiningPass()); + + // Reassociate expressions. + fpm.add(llvm::createReassociatePass()); + + // Eliminate Common SubExpressions. + fpm.add(llvm::createGVNPass()); + + // Simplify the control flow graph (deleting unreachable + // blocks, etc). + fpm.add(llvm::createCFGSimplificationPass()); +#endif // ADD_OPT_PASSES + + fpm.doInitialization(); + + // Generate test code using function throwCppException(...) as + // the function which throws foreign exceptions. + llvm::Function* toRun = + createUnwindExceptionTest(*module, + theBuilder, + fpm, + "throwCppException"); + + fprintf(stderr, "\nBegin module dump:\n\n"); + + module->dump(); + + fprintf(stderr, "\nEnd module dump:\n"); + + fprintf(stderr, "\n\nBegin Test:\n"); + + for (int i = 1; i < argc; ++i) { + // Run test for each argument whose value is the exception + // type to throw. + runExceptionThrow(executionEngine, + toRun, + (unsigned) strtoul(argv[i], NULL, 10)); + } + + fprintf(stderr, "\nEnd Test:\n\n"); + } + + delete executionEngine; + + return 0; +} + diff --git a/examples/ExceptionDemo/Makefile b/examples/ExceptionDemo/Makefile new file mode 100644 index 0000000..06bba66 --- /dev/null +++ b/examples/ExceptionDemo/Makefile @@ -0,0 +1,17 @@ +##===- examples/ExceptionDemo/Makefile --------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===---------------------------------------------------------------------===## +LEVEL = ../.. +TOOLNAME = ExceptionDemo +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := jit interpreter nativecodegen + +include $(LEVEL)/Makefile.common + +CXXFLAGS += -fexceptions diff --git a/examples/Fibonacci/CMakeLists.txt b/examples/Fibonacci/CMakeLists.txt new file mode 100644 index 0000000..6937612 --- /dev/null +++ b/examples/Fibonacci/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen) + +add_llvm_example(Fibonacci + fibonacci.cpp + ) diff --git a/examples/Fibonacci/Makefile b/examples/Fibonacci/Makefile new file mode 100644 index 0000000..71f6ba0 --- /dev/null +++ b/examples/Fibonacci/Makefile @@ -0,0 +1,17 @@ +##===- examples/Fibonacci/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +TOOLNAME = Fibonacci +EXAMPLE_TOOL = 1 + +# Link in JIT support +LINK_COMPONENTS := jit interpreter nativecodegen + +include $(LEVEL)/Makefile.common diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp new file mode 100644 index 0000000..353e173 --- /dev/null +++ b/examples/Fibonacci/fibonacci.cpp @@ -0,0 +1,131 @@ +//===--- examples/Fibonacci/fibonacci.cpp - An example use of the JIT -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This small program provides an example of how to build quickly a small module +// with function Fibonacci and execute it with the JIT. +// +// The goal of this snippet is to create in the memory the LLVM module +// consisting of one function as follow: +// +// int fib(int x) { +// if(x<=2) return 1; +// return fib(x-1)+fib(x-2); +// } +// +// Once we have this, we compile the module via JIT, then execute the `fib' +// function and return result to a driver, i.e. to a "host program". +// +//===----------------------------------------------------------------------===// + +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetSelect.h" +using namespace llvm; + +static Function *CreateFibFunction(Module *M, LLVMContext &Context) { + // Create the fib function and insert it into module M. This function is said + // to return an int and take an int parameter. + Function *FibF = + cast<Function>(M->getOrInsertFunction("fib", Type::getInt32Ty(Context), + Type::getInt32Ty(Context), + (Type *)0)); + + // Add a basic block to the function. + BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", FibF); + + // Get pointers to the constants. + Value *One = ConstantInt::get(Type::getInt32Ty(Context), 1); + Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2); + + // Get pointer to the integer argument of the add1 function... + Argument *ArgX = FibF->arg_begin(); // Get the arg. + ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. + + // Create the true_block. + BasicBlock *RetBB = BasicBlock::Create(Context, "return", FibF); + // Create an exit block. + BasicBlock* RecurseBB = BasicBlock::Create(Context, "recurse", FibF); + + // Create the "if (arg <= 2) goto exitbb" + Value *CondInst = new ICmpInst(*BB, ICmpInst::ICMP_SLE, ArgX, Two, "cond"); + BranchInst::Create(RetBB, RecurseBB, CondInst, BB); + + // Create: ret int 1 + ReturnInst::Create(Context, One, RetBB); + + // create fib(x-1) + Value *Sub = BinaryOperator::CreateSub(ArgX, One, "arg", RecurseBB); + CallInst *CallFibX1 = CallInst::Create(FibF, Sub, "fibx1", RecurseBB); + CallFibX1->setTailCall(); + + // create fib(x-2) + Sub = BinaryOperator::CreateSub(ArgX, Two, "arg", RecurseBB); + CallInst *CallFibX2 = CallInst::Create(FibF, Sub, "fibx2", RecurseBB); + CallFibX2->setTailCall(); + + + // fib(x-1)+fib(x-2) + Value *Sum = BinaryOperator::CreateAdd(CallFibX1, CallFibX2, + "addresult", RecurseBB); + + // Create the return instruction and add it to the basic block + ReturnInst::Create(Context, Sum, RecurseBB); + + return FibF; +} + + +int main(int argc, char **argv) { + int n = argc > 1 ? atol(argv[1]) : 24; + + InitializeNativeTarget(); + LLVMContext Context; + + // Create some module to put our function into it. + Module *M = new Module("test", Context); + + // We are about to create the "fib" function: + Function *FibF = CreateFibFunction(M, Context); + + // Now we going to create JIT + std::string errStr; + ExecutionEngine *EE = EngineBuilder(M).setErrorStr(&errStr).setEngineKind(EngineKind::JIT).create(); + + if (!EE) { + errs() << argv[0] << ": Failed to construct ExecutionEngine: " << errStr << "\n"; + return 1; + } + + errs() << "verifying... "; + if (verifyModule(*M)) { + errs() << argv[0] << ": Error constructing function!\n"; + return 1; + } + + errs() << "OK\n"; + errs() << "We just constructed this LLVM module:\n\n---------\n" << *M; + errs() << "---------\nstarting fibonacci(" << n << ") with JIT...\n"; + + // Call the Fibonacci function with argument n: + std::vector<GenericValue> Args(1); + Args[0].IntVal = APInt(32, n); + GenericValue GV = EE->runFunction(FibF, Args); + + // import result of execution + outs() << "Result: " << GV.IntVal << "\n"; + return 0; +} diff --git a/examples/HowToUseJIT/CMakeLists.txt b/examples/HowToUseJIT/CMakeLists.txt new file mode 100644 index 0000000..428b53f --- /dev/null +++ b/examples/HowToUseJIT/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen) + +add_llvm_example(HowToUseJIT + HowToUseJIT.cpp + ) diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp new file mode 100644 index 0000000..8e3b6dc --- /dev/null +++ b/examples/HowToUseJIT/HowToUseJIT.cpp @@ -0,0 +1,124 @@ +//===-- examples/HowToUseJIT/HowToUseJIT.cpp - An example use of the JIT --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This small program provides an example of how to quickly build a small +// module with two functions and execute it with the JIT. +// +// Goal: +// The goal of this snippet is to create in the memory +// the LLVM module consisting of two functions as follow: +// +// int add1(int x) { +// return x+1; +// } +// +// int foo() { +// return add1(10); +// } +// +// then compile the module via JIT, then execute the `foo' +// function and return result to a driver, i.e. to a "host program". +// +// Some remarks and questions: +// +// - could we invoke some code using noname functions too? +// e.g. evaluate "foo()+foo()" without fears to introduce +// conflict of temporary function name with some real +// existing function name? +// +//===----------------------------------------------------------------------===// + +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/Target/TargetSelect.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +int main() { + + InitializeNativeTarget(); + + LLVMContext Context; + + // Create some module to put our function into it. + Module *M = new Module("test", Context); + + // Create the add1 function entry and insert this entry into module M. The + // function will have a return type of "int" and take an argument of "int". + // The '0' terminates the list of argument types. + Function *Add1F = + cast<Function>(M->getOrInsertFunction("add1", Type::getInt32Ty(Context), + Type::getInt32Ty(Context), + (Type *)0)); + + // Add a basic block to the function. As before, it automatically inserts + // because of the last argument. + BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", Add1F); + + // Get pointers to the constant `1'. + Value *One = ConstantInt::get(Type::getInt32Ty(Context), 1); + + // Get pointers to the integer argument of the add1 function... + assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg + Argument *ArgX = Add1F->arg_begin(); // Get the arg + ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. + + // Create the add instruction, inserting it into the end of BB. + Instruction *Add = BinaryOperator::CreateAdd(One, ArgX, "addresult", BB); + + // Create the return instruction and add it to the basic block + ReturnInst::Create(Context, Add, BB); + + // Now, function add1 is ready. + + + // Now we going to create function `foo', which returns an int and takes no + // arguments. + Function *FooF = + cast<Function>(M->getOrInsertFunction("foo", Type::getInt32Ty(Context), + (Type *)0)); + + // Add a basic block to the FooF function. + BB = BasicBlock::Create(Context, "EntryBlock", FooF); + + // Get pointers to the constant `10'. + Value *Ten = ConstantInt::get(Type::getInt32Ty(Context), 10); + + // Pass Ten to the call call: + CallInst *Add1CallRes = CallInst::Create(Add1F, Ten, "add1", BB); + Add1CallRes->setTailCall(true); + + // Create the return instruction and add it to the basic block. + ReturnInst::Create(Context, Add1CallRes, BB); + + // Now we create the JIT. + ExecutionEngine* EE = EngineBuilder(M).create(); + + outs() << "We just constructed this LLVM module:\n\n" << *M; + outs() << "\n\nRunning foo: "; + outs().flush(); + + // Call the `foo' function with no arguments: + std::vector<GenericValue> noargs; + GenericValue gv = EE->runFunction(FooF, noargs); + + // Import result of execution: + outs() << "Result: " << gv.IntVal << "\n"; + EE->freeMachineCodeForFunction(FooF); + delete EE; + llvm_shutdown(); + return 0; +} diff --git a/examples/HowToUseJIT/Makefile b/examples/HowToUseJIT/Makefile new file mode 100644 index 0000000..c8919db --- /dev/null +++ b/examples/HowToUseJIT/Makefile @@ -0,0 +1,15 @@ +##===- examples/HowToUseJIT/Makefile -----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../.. +TOOLNAME = HowToUseJIT +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := jit interpreter nativecodegen + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/CMakeLists.txt b/examples/Kaleidoscope/CMakeLists.txt new file mode 100644 index 0000000..8c87ac5 --- /dev/null +++ b/examples/Kaleidoscope/CMakeLists.txt @@ -0,0 +1,6 @@ +add_subdirectory(Chapter2) +add_subdirectory(Chapter3) +add_subdirectory(Chapter4) +add_subdirectory(Chapter5) +add_subdirectory(Chapter6) +add_subdirectory(Chapter7) diff --git a/examples/Kaleidoscope/Chapter2/CMakeLists.txt b/examples/Kaleidoscope/Chapter2/CMakeLists.txt new file mode 100644 index 0000000..79f2b17 --- /dev/null +++ b/examples/Kaleidoscope/Chapter2/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_example(Kaleidoscope-Ch2 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter2/Makefile b/examples/Kaleidoscope/Chapter2/Makefile new file mode 100644 index 0000000..1a9b94c --- /dev/null +++ b/examples/Kaleidoscope/Chapter2/Makefile @@ -0,0 +1,13 @@ +##===- examples/Kaleidoscope/Chapter2/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch2 +EXAMPLE_TOOL = 1 + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter2/toy.cpp b/examples/Kaleidoscope/Chapter2/toy.cpp new file mode 100644 index 0000000..f4f09d0 --- /dev/null +++ b/examples/Kaleidoscope/Chapter2/toy.cpp @@ -0,0 +1,398 @@ +#include <cstdio> +#include <cstdlib> +#include <string> +#include <map> +#include <vector> + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args) + : Name(name), Args(args) {} + +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing +//===----------------------------------------------------------------------===// + +static void HandleDefinition() { + if (ParseDefinition()) { + fprintf(stderr, "Parsed a function definition.\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (ParseExtern()) { + fprintf(stderr, "Parsed an extern\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (ParseTopLevelExpr()) { + fprintf(stderr, "Parsed a top-level expr\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Run the main "interpreter loop" now. + MainLoop(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter3/CMakeLists.txt b/examples/Kaleidoscope/Chapter3/CMakeLists.txt new file mode 100644 index 0000000..1af8db0 --- /dev/null +++ b/examples/Kaleidoscope/Chapter3/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core) + +add_llvm_example(Kaleidoscope-Ch3 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter3/Makefile b/examples/Kaleidoscope/Chapter3/Makefile new file mode 100644 index 0000000..4cc6948 --- /dev/null +++ b/examples/Kaleidoscope/Chapter3/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter3/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch3 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp new file mode 100644 index 0000000..73520d8 --- /dev/null +++ b/examples/Kaleidoscope/Chapter3/toy.cpp @@ -0,0 +1,563 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Support/IRBuilder.h" +#include <cstdio> +#include <string> +#include <map> +#include <vector> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args) + : Name(name), Args(args) {} + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map<std::string, Value*> NamedValues; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateAdd(L, R, "addtmp"); + case '-': return Builder.CreateSub(L, R, "subtmp"); + case '*': return Builder.CreateMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector<Value*> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp"); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector<const Type*> Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read top-level expression:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Run the main "interpreter loop" now. + MainLoop(); + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter4/CMakeLists.txt b/examples/Kaleidoscope/Chapter4/CMakeLists.txt new file mode 100644 index 0000000..0d1ac53 --- /dev/null +++ b/examples/Kaleidoscope/Chapter4/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core jit interpreter native) + +add_llvm_example(Kaleidoscope-Ch4 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter4/Makefile b/examples/Kaleidoscope/Chapter4/Makefile new file mode 100644 index 0000000..7bc742f --- /dev/null +++ b/examples/Kaleidoscope/Chapter4/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter4/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch4 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core jit interpreter native + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp new file mode 100644 index 0000000..cdc9d74 --- /dev/null +++ b/examples/Kaleidoscope/Chapter4/toy.cpp @@ -0,0 +1,606 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/IRBuilder.h" +#include <cstdio> +#include <string> +#include <map> +#include <vector> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args) + : Name(name), Args(args) {} + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map<std::string, Value*> NamedValues; +static FunctionPassManager *TheFPM; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateAdd(L, R, "addtmp"); + case '-': return Builder.CreateSub(L, R, "subtmp"); + case '*': return Builder.CreateMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector<Value*> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp"); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector<const Type*> Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static ExecutionEngine *TheExecutionEngine; + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Create the JIT. This takes ownership of the module. + TheExecutionEngine = EngineBuilder(TheModule).create(); + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter5/CMakeLists.txt b/examples/Kaleidoscope/Chapter5/CMakeLists.txt new file mode 100644 index 0000000..2d75ad3 --- /dev/null +++ b/examples/Kaleidoscope/Chapter5/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core jit interpreter native) + +add_llvm_example(Kaleidoscope-Ch5 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter5/Makefile b/examples/Kaleidoscope/Chapter5/Makefile new file mode 100644 index 0000000..5a8355d --- /dev/null +++ b/examples/Kaleidoscope/Chapter5/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter5/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch5 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core jit interpreter native + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp new file mode 100644 index 0000000..24f551f --- /dev/null +++ b/examples/Kaleidoscope/Chapter5/toy.cpp @@ -0,0 +1,851 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/IRBuilder.h" +#include <cstdio> +#include <string> +#include <map> +#include <vector> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; +public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; +public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args) + : Name(name), Args(args) {} + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map<std::string, Value*> NamedValues; +static FunctionPassManager *TheFPM; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateAdd(L, R, "addtmp"); + case '-': return Builder.CreateSub(L, R, "subtmp"); + case '*': return Builder.CreateMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector<Value*> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp"); +} + +Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +Value *ForExprAST::Codegen() { + // Output this as: + // ... + // start = startexpr + // goto loop + // loop: + // variable = phi [start, loopheader], [nextvariable, loopend] + // ... + // bodyexpr + // ... + // loopend: + // step = stepexpr + // nextvariable = variable + step + // endcond = endexpr + // br endcond, loop, endloop + // outloop: + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Make the new basic block for the loop header, inserting after current + // block. + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + BasicBlock *PreheaderBB = Builder.GetInsertBlock(); + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Start the PHI node with an entry for Start. + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str()); + Variable->addIncoming(StartVal, PreheaderBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + Value *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Variable; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar"); + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *LoopEndBB = Builder.GetInsertBlock(); + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Add a new entry to the PHI node for the backedge. + Variable->addIncoming(NextVar, LoopEndBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector<const Type*> Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static ExecutionEngine *TheExecutionEngine; + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Create the JIT. This takes ownership of the module. + TheExecutionEngine = EngineBuilder(TheModule).create(); + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter6/CMakeLists.txt b/examples/Kaleidoscope/Chapter6/CMakeLists.txt new file mode 100644 index 0000000..2e15a5f --- /dev/null +++ b/examples/Kaleidoscope/Chapter6/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core jit interpreter native) + +add_llvm_example(Kaleidoscope-Ch6 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter6/Makefile b/examples/Kaleidoscope/Chapter6/Makefile new file mode 100644 index 0000000..de2d758 --- /dev/null +++ b/examples/Kaleidoscope/Chapter6/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter6/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch6 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core jit interpreter native + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp new file mode 100644 index 0000000..f4b5b8c --- /dev/null +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -0,0 +1,969 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/IRBuilder.h" +#include <cstdio> +#include <string> +#include <map> +#include <vector> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10, + + // operators + tok_binary = -11, tok_unary = -12 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + if (IdentifierStr == "binary") return tok_binary; + if (IdentifierStr == "unary") return tok_unary; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// UnaryExprAST - Expression class for a unary operator. +class UnaryExprAST : public ExprAST { + char Opcode; + ExprAST *Operand; +public: + UnaryExprAST(char opcode, ExprAST *operand) + : Opcode(opcode), Operand(operand) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; +public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; +public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes), as well as if it is an operator. +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; + bool isOperator; + unsigned Precedence; // Precedence if a binary op. +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args, + bool isoperator = false, unsigned prec = 0) + : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} + + bool isUnaryOp() const { return isOperator && Args.size() == 1; } + bool isBinaryOp() const { return isOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size()-1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + } +} + +/// unary +/// ::= primary +/// ::= '!' unary +static ExprAST *ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (ExprAST *Operand = ParseUnary()) + return new UnaryExprAST(Opc, Operand); + return 0; +} + +/// binoprhs +/// ::= ('+' unary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + ExprAST *RHS = ParseUnary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= unary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParseUnary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +/// ::= binary LETTER number? (id, id) +/// ::= unary LETTER (id) +static PrototypeAST *ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return ErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return ErrorP("Invalid precedecnce: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return ErrorP("Invalid number of operands for operator"); + + return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map<std::string, Value*> NamedValues; +static FunctionPassManager *TheFPM; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *UnaryExprAST::Codegen() { + Value *OperandV = Operand->Codegen(); + if (OperandV == 0) return 0; + + Function *F = TheModule->getFunction(std::string("unary")+Opcode); + if (F == 0) + return ErrorV("Unknown unary operator"); + + return Builder.CreateCall(F, OperandV, "unop"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateAdd(L, R, "addtmp"); + case '-': return Builder.CreateSub(L, R, "subtmp"); + case '*': return Builder.CreateMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = TheModule->getFunction(std::string("binary")+Op); + assert(F && "binary operator not found!"); + + Value *Ops[] = { L, R }; + return Builder.CreateCall(F, Ops, Ops+2, "binop"); +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector<Value*> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp"); +} + +Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +Value *ForExprAST::Codegen() { + // Output this as: + // ... + // start = startexpr + // goto loop + // loop: + // variable = phi [start, loopheader], [nextvariable, loopend] + // ... + // bodyexpr + // ... + // loopend: + // step = stepexpr + // nextvariable = variable + step + // endcond = endexpr + // br endcond, loop, endloop + // outloop: + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Make the new basic block for the loop header, inserting after current + // block. + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + BasicBlock *PreheaderBB = Builder.GetInsertBlock(); + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Start the PHI node with an entry for Start. + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str()); + Variable->addIncoming(StartVal, PreheaderBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + Value *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Variable; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar"); + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *LoopEndBB = Builder.GetInsertBlock(); + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Add a new entry to the PHI node for the backedge. + Variable->addIncoming(NextVar, LoopEndBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector<const Type*> Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // If this is an operator, install it. + if (Proto->isBinaryOp()) + BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (Proto->isBinaryOp()) + BinopPrecedence.erase(Proto->getOperatorName()); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static ExecutionEngine *TheExecutionEngine; + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" +double printd(double X) { + printf("%f\n", X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Create the JIT. This takes ownership of the module. + TheExecutionEngine = EngineBuilder(TheModule).create(); + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/examples/Kaleidoscope/Chapter7/CMakeLists.txt new file mode 100644 index 0000000..9b8227c --- /dev/null +++ b/examples/Kaleidoscope/Chapter7/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core jit interpreter native) + +add_llvm_example(Kaleidoscope-Ch7 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter7/Makefile b/examples/Kaleidoscope/Chapter7/Makefile new file mode 100644 index 0000000..8911d52 --- /dev/null +++ b/examples/Kaleidoscope/Chapter7/Makefile @@ -0,0 +1,16 @@ +##===- examples/Kaleidoscope/Chapter7/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch7 +EXAMPLE_TOOL = 1 +REQUIRES_RTTI := 1 + +LINK_COMPONENTS := core jit interpreter native + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp new file mode 100644 index 0000000..951dfd8 --- /dev/null +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -0,0 +1,1135 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/IRBuilder.h" +#include <cstdio> +#include <string> +#include <map> +#include <vector> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10, + + // operators + tok_binary = -11, tok_unary = -12, + + // var definition + tok_var = -13 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + if (IdentifierStr == "binary") return tok_binary; + if (IdentifierStr == "unary") return tok_unary; + if (IdentifierStr == "var") return tok_var; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + const std::string &getName() const { return Name; } + virtual Value *Codegen(); +}; + +/// UnaryExprAST - Expression class for a unary operator. +class UnaryExprAST : public ExprAST { + char Opcode; + ExprAST *Operand; +public: + UnaryExprAST(char opcode, ExprAST *operand) + : Opcode(opcode), Operand(operand) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; +public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; +public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); +}; + +/// VarExprAST - Expression class for var/in +class VarExprAST : public ExprAST { + std::vector<std::pair<std::string, ExprAST*> > VarNames; + ExprAST *Body; +public: + VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames, + ExprAST *body) + : VarNames(varnames), Body(body) {} + + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its argument names as well as if it is an operator. +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; + bool isOperator; + unsigned Precedence; // Precedence if a binary op. +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args, + bool isoperator = false, unsigned prec = 0) + : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} + + bool isUnaryOp() const { return isOperator && Args.size() == 1; } + bool isBinaryOp() const { return isOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size()-1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } + + Function *Codegen(); + + void CreateArgumentAllocas(Function *F); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); +} + +/// varexpr ::= 'var' identifier ('=' expression)? +// (',' identifier ('=' expression)?)* 'in' expression +static ExprAST *ParseVarExpr() { + getNextToken(); // eat the var. + + std::vector<std::pair<std::string, ExprAST*> > VarNames; + + // At least one variable name is required. + if (CurTok != tok_identifier) + return Error("expected identifier after var"); + + while (1) { + std::string Name = IdentifierStr; + getNextToken(); // eat identifier. + + // Read the optional initializer. + ExprAST *Init = 0; + if (CurTok == '=') { + getNextToken(); // eat the '='. + + Init = ParseExpression(); + if (Init == 0) return 0; + } + + VarNames.push_back(std::make_pair(Name, Init)); + + // End of var list, exit loop. + if (CurTok != ',') break; + getNextToken(); // eat the ','. + + if (CurTok != tok_identifier) + return Error("expected identifier list after var"); + } + + // At this point, we have to have 'in'. + if (CurTok != tok_in) + return Error("expected 'in' keyword after 'var'"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new VarExprAST(VarNames, Body); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +/// ::= varexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + case tok_var: return ParseVarExpr(); + } +} + +/// unary +/// ::= primary +/// ::= '!' unary +static ExprAST *ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (ExprAST *Operand = ParseUnary()) + return new UnaryExprAST(Opc, Operand); + return 0; +} + +/// binoprhs +/// ::= ('+' unary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + ExprAST *RHS = ParseUnary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= unary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParseUnary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +/// ::= binary LETTER number? (id, id) +/// ::= unary LETTER (id) +static PrototypeAST *ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return ErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return ErrorP("Invalid precedecnce: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return ErrorP("Invalid number of operands for operator"); + + return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map<std::string, AllocaInst*> NamedValues; +static FunctionPassManager *TheFPM; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of +/// the function. This is used for mutable variables etc. +static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, + const std::string &VarName) { + IRBuilder<> TmpB(&TheFunction->getEntryBlock(), + TheFunction->getEntryBlock().begin()); + return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, + VarName.c_str()); +} + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (V == 0) return ErrorV("Unknown variable name"); + + // Load the value. + return Builder.CreateLoad(V, Name.c_str()); +} + +Value *UnaryExprAST::Codegen() { + Value *OperandV = Operand->Codegen(); + if (OperandV == 0) return 0; + + Function *F = TheModule->getFunction(std::string("unary")+Opcode); + if (F == 0) + return ErrorV("Unknown unary operator"); + + return Builder.CreateCall(F, OperandV, "unop"); +} + +Value *BinaryExprAST::Codegen() { + // Special case '=' because we don't want to emit the LHS as an expression. + if (Op == '=') { + // Assignment requires the LHS to be an identifier. + VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS); + if (!LHSE) + return ErrorV("destination of '=' must be a variable"); + // Codegen the RHS. + Value *Val = RHS->Codegen(); + if (Val == 0) return 0; + + // Look up the name. + Value *Variable = NamedValues[LHSE->getName()]; + if (Variable == 0) return ErrorV("Unknown variable name"); + + Builder.CreateStore(Val, Variable); + return Val; + } + + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateAdd(L, R, "addtmp"); + case '-': return Builder.CreateSub(L, R, "subtmp"); + case '*': return Builder.CreateMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = TheModule->getFunction(std::string("binary")+Op); + assert(F && "binary operator not found!"); + + Value *Ops[] = { L, R }; + return Builder.CreateCall(F, Ops, Ops+2, "binop"); +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector<Value*> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp"); +} + +Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +Value *ForExprAST::Codegen() { + // Output this as: + // var = alloca double + // ... + // start = startexpr + // store start -> var + // goto loop + // loop: + // ... + // bodyexpr + // ... + // loopend: + // step = stepexpr + // endcond = endexpr + // + // curvar = load var + // nextvar = curvar + step + // store nextvar -> var + // br endcond, loop, endloop + // outloop: + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create an alloca for the variable in the entry block. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Store the value into the alloca. + Builder.CreateStore(StartVal, Alloca); + + // Make the new basic block for the loop header, inserting after current + // block. + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + AllocaInst *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Alloca; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Reload, increment, and restore the alloca. This handles the case where + // the body of the loop mutates the variable. + Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); + Value *NextVar = Builder.CreateAdd(CurVar, StepVal, "nextvar"); + Builder.CreateStore(NextVar, Alloca); + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); +} + +Value *VarExprAST::Codegen() { + std::vector<AllocaInst *> OldBindings; + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Register all variables and emit their initializer. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { + const std::string &VarName = VarNames[i].first; + ExprAST *Init = VarNames[i].second; + + // Emit the initializer before adding the variable to scope, this prevents + // the initializer from referencing the variable itself, and permits stuff + // like this: + // var a = 1 in + // var a = a in ... # refers to outer 'a'. + Value *InitVal; + if (Init) { + InitVal = Init->Codegen(); + if (InitVal == 0) return 0; + } else { // If not specified, use 0.0. + InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); + } + + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + Builder.CreateStore(InitVal, Alloca); + + // Remember the old variable binding so that we can restore the binding when + // we unrecurse. + OldBindings.push_back(NamedValues[VarName]); + + // Remember this binding. + NamedValues[VarName] = Alloca; + } + + // Codegen the body, now that all vars are in scope. + Value *BodyVal = Body->Codegen(); + if (BodyVal == 0) return 0; + + // Pop all our variables from scope. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) + NamedValues[VarNames[i].first] = OldBindings[i]; + + // Return the body computation. + return BodyVal; +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector<const Type*> Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) + AI->setName(Args[Idx]); + + return F; +} + +/// CreateArgumentAllocas - Create an alloca for each argument and register the +/// argument in the symbol table so that references to it will succeed. +void PrototypeAST::CreateArgumentAllocas(Function *F) { + Function::arg_iterator AI = F->arg_begin(); + for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) { + // Create an alloca for this variable. + AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); + + // Store the initial value into the alloca. + Builder.CreateStore(AI, Alloca); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = Alloca; + } +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // If this is an operator, install it. + if (Proto->isBinaryOp()) + BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + // Add all arguments to the symbol table and create their allocas. + Proto->CreateArgumentAllocas(TheFunction); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (Proto->isBinaryOp()) + BinopPrecedence.erase(Proto->getOperatorName()); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static ExecutionEngine *TheExecutionEngine; + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" +double printd(double X) { + printf("%f\n", X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['='] = 2; + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Create the JIT. This takes ownership of the module. + TheExecutionEngine = EngineBuilder(TheModule).create(); + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Promote allocas to registers. + OurFPM.add(createPromoteMemoryToRegisterPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Makefile b/examples/Kaleidoscope/Makefile new file mode 100644 index 0000000..bd0c252 --- /dev/null +++ b/examples/Kaleidoscope/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Makefile ----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL=../.. + +include $(LEVEL)/Makefile.config + +PARALLEL_DIRS:= Chapter2 Chapter3 Chapter4 Chapter5 Chapter6 Chapter7 + +include $(LEVEL)/Makefile.common diff --git a/examples/Makefile b/examples/Makefile new file mode 100644 index 0000000..fc3a7d4 --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,23 @@ +##===- examples/Makefile -----------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL=.. + +include $(LEVEL)/Makefile.config + +PARALLEL_DIRS:= BrainF Fibonacci HowToUseJIT Kaleidoscope ModuleMaker + +ifeq ($(HAVE_PTHREAD),1) +PARALLEL_DIRS += ParallelJIT +endif + +ifeq ($(LLVM_ON_UNIX),1) +PARALLEL_DIRS += ExceptionDemo +endif + +include $(LEVEL)/Makefile.common diff --git a/examples/ModuleMaker/CMakeLists.txt b/examples/ModuleMaker/CMakeLists.txt new file mode 100644 index 0000000..81e9115 --- /dev/null +++ b/examples/ModuleMaker/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS bitwriter) + +add_llvm_example(ModuleMaker + ModuleMaker.cpp + ) diff --git a/examples/ModuleMaker/Makefile b/examples/ModuleMaker/Makefile new file mode 100644 index 0000000..9454cf5 --- /dev/null +++ b/examples/ModuleMaker/Makefile @@ -0,0 +1,14 @@ +##===- examples/ModuleMaker/Makefile -----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL=../.. +TOOLNAME=ModuleMaker +EXAMPLE_TOOL = 1 +LINK_COMPONENTS := bitwriter + +include $(LEVEL)/Makefile.common diff --git a/examples/ModuleMaker/ModuleMaker.cpp b/examples/ModuleMaker/ModuleMaker.cpp new file mode 100644 index 0000000..6bc52c1 --- /dev/null +++ b/examples/ModuleMaker/ModuleMaker.cpp @@ -0,0 +1,64 @@ +//===- examples/ModuleMaker/ModuleMaker.cpp - Example project ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This programs is a simple example that creates an LLVM module "from scratch", +// emitting it as a bitcode file to standard out. This is just to show how +// LLVM projects work and to demonstrate some of the LLVM APIs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +int main() { + LLVMContext Context; + + // Create the "module" or "program" or "translation unit" to hold the + // function + Module *M = new Module("test", Context); + + // Create the main function: first create the type 'int ()' + FunctionType *FT = + FunctionType::get(Type::getInt32Ty(Context), /*not vararg*/false); + + // By passing a module as the last parameter to the Function constructor, + // it automatically gets appended to the Module. + Function *F = Function::Create(FT, Function::ExternalLinkage, "main", M); + + // Add a basic block to the function... again, it automatically inserts + // because of the last argument. + BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", F); + + // Get pointers to the constant integers... + Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2); + Value *Three = ConstantInt::get(Type::getInt32Ty(Context), 3); + + // Create the add instruction... does not insert... + Instruction *Add = BinaryOperator::Create(Instruction::Add, Two, Three, + "addresult"); + + // explicitly insert it into the basic block... + BB->getInstList().push_back(Add); + + // Create the return instruction and add it to the basic block + BB->getInstList().push_back(ReturnInst::Create(Context, Add)); + + // Output the bitcode file to stdout + WriteBitcodeToFile(M, outs()); + + // Delete the module and all of its contents. + delete M; + return 0; +} diff --git a/examples/ModuleMaker/README.txt b/examples/ModuleMaker/README.txt new file mode 100644 index 0000000..ecbe30e --- /dev/null +++ b/examples/ModuleMaker/README.txt @@ -0,0 +1,8 @@ +//===----------------------------------------------------------------------===// +// ModuleMaker Sample project +//===----------------------------------------------------------------------===// + +This project is an extremely simple example of using some simple pieces of the +LLVM API. The actual executable generated by this project simply emits an +LLVM bytecode file to standard output. It is designed to show some basic +usage of LLVM APIs, and how to link to LLVM libraries. diff --git a/examples/ParallelJIT/CMakeLists.txt b/examples/ParallelJIT/CMakeLists.txt new file mode 100644 index 0000000..fbdc6e5 --- /dev/null +++ b/examples/ParallelJIT/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen) + +add_llvm_example(ParallelJIT + ParallelJIT.cpp + ) + +if(HAVE_LIBPTHREAD) + target_link_libraries(ParallelJIT pthread) +endif(HAVE_LIBPTHREAD) diff --git a/examples/ParallelJIT/Makefile b/examples/ParallelJIT/Makefile new file mode 100644 index 0000000..8a49d42 --- /dev/null +++ b/examples/ParallelJIT/Makefile @@ -0,0 +1,17 @@ +##===- examples/ParallelJIT/Makefile -----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../.. +TOOLNAME = ParallelJIT +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := jit interpreter nativecodegen + +include $(LEVEL)/Makefile.common + +LIBS += -lpthread diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp new file mode 100644 index 0000000..9231abf --- /dev/null +++ b/examples/ParallelJIT/ParallelJIT.cpp @@ -0,0 +1,304 @@ +//===-- examples/ParallelJIT/ParallelJIT.cpp - Exercise threaded-safe JIT -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Parallel JIT +// +// This test program creates two LLVM functions then calls them from three +// separate threads. It requires the pthreads library. +// The three threads are created and then block waiting on a condition variable. +// Once all threads are blocked on the conditional variable, the main thread +// wakes them up. This complicated work is performed so that all three threads +// call into the JIT at the same time (or the best possible approximation of the +// same time). This test had assertion errors until I got the locking right. + +#include <pthread.h> +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/Target/TargetSelect.h" +#include <iostream> +using namespace llvm; + +static Function* createAdd1(Module *M) { + // Create the add1 function entry and insert this entry into module M. The + // function will have a return type of "int" and take an argument of "int". + // The '0' terminates the list of argument types. + Function *Add1F = + cast<Function>(M->getOrInsertFunction("add1", + Type::getInt32Ty(M->getContext()), + Type::getInt32Ty(M->getContext()), + (Type *)0)); + + // Add a basic block to the function. As before, it automatically inserts + // because of the last argument. + BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", Add1F); + + // Get pointers to the constant `1'. + Value *One = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1); + + // Get pointers to the integer argument of the add1 function... + assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg + Argument *ArgX = Add1F->arg_begin(); // Get the arg + ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. + + // Create the add instruction, inserting it into the end of BB. + Instruction *Add = BinaryOperator::CreateAdd(One, ArgX, "addresult", BB); + + // Create the return instruction and add it to the basic block + ReturnInst::Create(M->getContext(), Add, BB); + + // Now, function add1 is ready. + return Add1F; +} + +static Function *CreateFibFunction(Module *M) { + // Create the fib function and insert it into module M. This function is said + // to return an int and take an int parameter. + Function *FibF = + cast<Function>(M->getOrInsertFunction("fib", + Type::getInt32Ty(M->getContext()), + Type::getInt32Ty(M->getContext()), + (Type *)0)); + + // Add a basic block to the function. + BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", FibF); + + // Get pointers to the constants. + Value *One = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1); + Value *Two = ConstantInt::get(Type::getInt32Ty(M->getContext()), 2); + + // Get pointer to the integer argument of the add1 function... + Argument *ArgX = FibF->arg_begin(); // Get the arg. + ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. + + // Create the true_block. + BasicBlock *RetBB = BasicBlock::Create(M->getContext(), "return", FibF); + // Create an exit block. + BasicBlock* RecurseBB = BasicBlock::Create(M->getContext(), "recurse", FibF); + + // Create the "if (arg < 2) goto exitbb" + Value *CondInst = new ICmpInst(*BB, ICmpInst::ICMP_SLE, ArgX, Two, "cond"); + BranchInst::Create(RetBB, RecurseBB, CondInst, BB); + + // Create: ret int 1 + ReturnInst::Create(M->getContext(), One, RetBB); + + // create fib(x-1) + Value *Sub = BinaryOperator::CreateSub(ArgX, One, "arg", RecurseBB); + Value *CallFibX1 = CallInst::Create(FibF, Sub, "fibx1", RecurseBB); + + // create fib(x-2) + Sub = BinaryOperator::CreateSub(ArgX, Two, "arg", RecurseBB); + Value *CallFibX2 = CallInst::Create(FibF, Sub, "fibx2", RecurseBB); + + // fib(x-1)+fib(x-2) + Value *Sum = + BinaryOperator::CreateAdd(CallFibX1, CallFibX2, "addresult", RecurseBB); + + // Create the return instruction and add it to the basic block + ReturnInst::Create(M->getContext(), Sum, RecurseBB); + + return FibF; +} + +struct threadParams { + ExecutionEngine* EE; + Function* F; + int value; +}; + +// We block the subthreads just before they begin to execute: +// we want all of them to call into the JIT at the same time, +// to verify that the locking is working correctly. +class WaitForThreads +{ +public: + WaitForThreads() + { + n = 0; + waitFor = 0; + + int result = pthread_cond_init( &condition, NULL ); + assert( result == 0 ); + + result = pthread_mutex_init( &mutex, NULL ); + assert( result == 0 ); + } + + ~WaitForThreads() + { + int result = pthread_cond_destroy( &condition ); + assert( result == 0 ); + + result = pthread_mutex_destroy( &mutex ); + assert( result == 0 ); + } + + // All threads will stop here until another thread calls releaseThreads + void block() + { + int result = pthread_mutex_lock( &mutex ); + assert( result == 0 ); + n ++; + //~ std::cout << "block() n " << n << " waitFor " << waitFor << std::endl; + + assert( waitFor == 0 || n <= waitFor ); + if ( waitFor > 0 && n == waitFor ) + { + // There are enough threads blocked that we can release all of them + std::cout << "Unblocking threads from block()" << std::endl; + unblockThreads(); + } + else + { + // We just need to wait until someone unblocks us + result = pthread_cond_wait( &condition, &mutex ); + assert( result == 0 ); + } + + // unlock the mutex before returning + result = pthread_mutex_unlock( &mutex ); + assert( result == 0 ); + } + + // If there are num or more threads blocked, it will signal them all + // Otherwise, this thread blocks until there are enough OTHER threads + // blocked + void releaseThreads( size_t num ) + { + int result = pthread_mutex_lock( &mutex ); + assert( result == 0 ); + + if ( n >= num ) { + std::cout << "Unblocking threads from releaseThreads()" << std::endl; + unblockThreads(); + } + else + { + waitFor = num; + pthread_cond_wait( &condition, &mutex ); + } + + // unlock the mutex before returning + result = pthread_mutex_unlock( &mutex ); + assert( result == 0 ); + } + +private: + void unblockThreads() + { + // Reset the counters to zero: this way, if any new threads + // enter while threads are exiting, they will block instead + // of triggering a new release of threads + n = 0; + + // Reset waitFor to zero: this way, if waitFor threads enter + // while threads are exiting, they will block instead of + // triggering a new release of threads + waitFor = 0; + + int result = pthread_cond_broadcast( &condition ); + assert(result == 0); result=result; + } + + size_t n; + size_t waitFor; + pthread_cond_t condition; + pthread_mutex_t mutex; +}; + +static WaitForThreads synchronize; + +void* callFunc( void* param ) +{ + struct threadParams* p = (struct threadParams*) param; + + // Call the `foo' function with no arguments: + std::vector<GenericValue> Args(1); + Args[0].IntVal = APInt(32, p->value); + + synchronize.block(); // wait until other threads are at this point + GenericValue gv = p->EE->runFunction(p->F, Args); + + return (void*)(intptr_t)gv.IntVal.getZExtValue(); +} + +int main() { + InitializeNativeTarget(); + LLVMContext Context; + + // Create some module to put our function into it. + Module *M = new Module("test", Context); + + Function* add1F = createAdd1( M ); + Function* fibF = CreateFibFunction( M ); + + // Now we create the JIT. + ExecutionEngine* EE = EngineBuilder(M).create(); + + //~ std::cout << "We just constructed this LLVM module:\n\n" << *M; + //~ std::cout << "\n\nRunning foo: " << std::flush; + + // Create one thread for add1 and two threads for fib + struct threadParams add1 = { EE, add1F, 1000 }; + struct threadParams fib1 = { EE, fibF, 39 }; + struct threadParams fib2 = { EE, fibF, 42 }; + + pthread_t add1Thread; + int result = pthread_create( &add1Thread, NULL, callFunc, &add1 ); + if ( result != 0 ) { + std::cerr << "Could not create thread" << std::endl; + return 1; + } + + pthread_t fibThread1; + result = pthread_create( &fibThread1, NULL, callFunc, &fib1 ); + if ( result != 0 ) { + std::cerr << "Could not create thread" << std::endl; + return 1; + } + + pthread_t fibThread2; + result = pthread_create( &fibThread2, NULL, callFunc, &fib2 ); + if ( result != 0 ) { + std::cerr << "Could not create thread" << std::endl; + return 1; + } + + synchronize.releaseThreads(3); // wait until other threads are at this point + + void* returnValue; + result = pthread_join( add1Thread, &returnValue ); + if ( result != 0 ) { + std::cerr << "Could not join thread" << std::endl; + return 1; + } + std::cout << "Add1 returned " << intptr_t(returnValue) << std::endl; + + result = pthread_join( fibThread1, &returnValue ); + if ( result != 0 ) { + std::cerr << "Could not join thread" << std::endl; + return 1; + } + std::cout << "Fib1 returned " << intptr_t(returnValue) << std::endl; + + result = pthread_join( fibThread2, &returnValue ); + if ( result != 0 ) { + std::cerr << "Could not join thread" << std::endl; + return 1; + } + std::cout << "Fib2 returned " << intptr_t(returnValue) << std::endl; + + return 0; +} |