| //===-- godumpspec.cpp - C->Go helper utility for llvm --------------------===// |
| // |
| // Copyright 2018 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This program is a helper for the libgo build. Given an object file |
| // and macros file derived from a given C source file, emit Go translations |
| // for the types/constants/macros in the C file. |
| // |
| // Expected usage mode looks something like this: |
| // |
| // % cc -E -dM -o somefile-macros.txt somefile.c |
| // % cc -g -c -o somefile.o somefile.c |
| // % llvm-godumpspec -object somefile.o \ |
| // -macrotmp somefile-macros.txt \ |
| // -output somefile-types-and-macros.go |
| // |
| // The tool reads DWARF from 'somefile.o' and combines the type/var/constant |
| // info from the DWARF with macro definitions from 'somefile-macros.txt' |
| // to produce Go equivalents for the type/var/constant info in the original |
| // C source file. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/DebugInfo/DIContext.h" |
| #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
| #include "llvm/Object/Binary.h" |
| #include "llvm/Object/ObjectFile.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/FileSystem.h" |
| #include "llvm/Support/Format.h" |
| #include "llvm/Support/ManagedStatic.h" |
| #include "llvm/Support/MemoryBuffer.h" |
| #include "llvm/Support/Path.h" |
| #include "llvm/Support/PrettyStackTrace.h" |
| #include "llvm/Support/Signals.h" |
| #include "llvm/Support/TargetSelect.h" |
| #include "llvm/Support/ToolOutputFile.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| #include "macro-parser.h" |
| |
| #include <unordered_set> |
| #include <unordered_map> |
| #include <iostream> |
| #include <fstream> |
| #include <sstream> |
| #include <string> |
| |
| using namespace llvm; |
| using namespace object; |
| |
| namespace { |
| |
| static cl::opt<std::string> |
| InputObjectFile("object", cl::desc("Object file for *.c file")); |
| |
| static cl::opt<std::string> |
| InputMacrosFile("macrotmp", cl::desc("Macros file for *.c file")); |
| |
| static cl::opt<std::string> |
| OutputFilename("output", cl::desc("Output file to write.")); |
| |
| static cl::opt<unsigned> |
| PointerSize("pointersize", cl::desc("Size of a pointer in bytes for " |
| "the target architecture of interest. " |
| "Defaults to host pointer size."), |
| cl::init(sizeof(void*))); |
| |
| static cl::opt<bool> |
| Trace("trace", cl::desc("Enable debug trace output.")); |
| |
| } // namespace |
| |
| // At various points we have to decide whether to use the previously |
| // established DWARF name for a type, or emit it inline. |
| typedef enum { |
| TN_SelectDefault, |
| TN_PreferName, |
| TN_AvoidName |
| } TypeNameDisp; |
| |
| // This helper / mix-in class provides helper routines for capturing |
| // intermediate results via a buffer. |
| |
| class DumpManager { |
| public: |
| |
| // Create a new dump manager, passing it an output ostream. |
| explicit DumpManager(raw_ostream &os); |
| |
| protected: |
| |
| // Output stream |
| raw_ostream &os() { return os_; } |
| |
| // Set up a temporary string buffer (accessed via 'buf()' for |
| // building up intermediate results). |
| void initBuf() { |
| if (buf_.get()) |
| buf_->flush(); |
| str_.reset(new std::string); |
| buf_.reset(new llvm::raw_string_ostream(*str_.get())); |
| } |
| |
| // Return a reference to the current intermediate results buffer. |
| llvm::raw_string_ostream &buf() { |
| assert(buf_.get() != nullptr); |
| return *buf_.get(); |
| } |
| |
| // Save/restore the current intermediate results buffer. |
| // Occasionally when processing a given type there is a need to |
| // pause emission while visiting a sub-type or field, which these |
| // methods enable. Here 'pauseBuf' returns the current intermediate |
| // buffer state (which the caller can then cache away somewhere) and |
| // re-initializes things with a call to initBuf(); restoreBuf takes |
| // the specified string/buf and resets the buffer using those |
| // objects (any current contents of the intermediate results buffer |
| // are lost). |
| std::pair<raw_string_ostream *, std::string *> pauseBuf(); |
| void restoreBuf(raw_string_ostream *stream, std::string *str); |
| |
| // Determines whether a given token is a Go language keyword. |
| bool isGoKeyWord(const char *str) { |
| return keywords_.find(str) != keywords_.end(); |
| } |
| |
| private: |
| std::unordered_set<std::string> keywords_; |
| std::unique_ptr<std::string> str_; |
| std::unique_ptr<llvm::raw_string_ostream> buf_; |
| raw_ostream &os_; // output file we're writing |
| }; |
| |
| DumpManager::DumpManager(raw_ostream &os) |
| : keywords_({"break", "default", "func", "interface", "select", |
| "case", "defer", "go", "map", "struct", "chan", "else", |
| "goto", "package", "switch", "const", "fallthrough", "if", |
| "range", "type", "continue", "for", "import", "return", "var"}), |
| os_(os) |
| { |
| } |
| |
| // Pause output buffering, saving off current state to 'saveTo'. Returns |
| // the state of the current buffer (which the client can presumbably |
| // stash away and later pass to restoreBuf). |
| |
| std::pair<raw_string_ostream *, std::string *> DumpManager::pauseBuf() |
| { |
| raw_string_ostream *r1 = buf_.release(); |
| std::string *r2 = str_.release(); |
| initBuf(); |
| return std::make_pair(r1, r2); |
| } |
| |
| // Restore buffer using previously capturede state from pauseBuf. |
| |
| void DumpManager::restoreBuf(raw_string_ostream *stream, std::string *str) |
| { |
| buf_.reset(stream); |
| str_.reset(str); |
| } |
| |
| // This class manages the overall process of generating Go code from |
| // DWARF and macro info derived from a C compilation. It walks the |
| // DWARF DIE chain from an object file we're looking at, and manages |
| // the process of combining DWARF type info with definitions from a |
| // macro temp file. Expected use here is to construct a helper, then |
| // call the readDwarf() method, then read + process any macro |
| // definitions, and finally called the emit() method. |
| |
| class GoDumpHelper : public MacroParser, public DumpManager { |
| public: |
| explicit GoDumpHelper(raw_ostream &os); |
| void readDwarf(DWARFCompileUnit *cu); |
| void emit(); |
| |
| private: |
| // Visit a type. Each type should be visited twice, first as part of |
| // a discovery/analysis phase (with emit_ == false) and then as part |
| // of an output phase (eith emit_ == true). |
| void visitType(const DWARFDie &die); |
| |
| // Emit a variable. |
| void emitVariable(const DWARFDie &die); |
| |
| // Record a given DWARF DIE for additional processing. |
| void enqueueType(const DWARFDie &die); |
| void enqueueVariable(const DWARFDie &die); |
| |
| // Visit the specified DWARF type DIE, generateing a Go version |
| // of the type into the intermediate results buffer. |
| bool generateType(const DWARFDie &die, TypeNameDisp disp = TN_SelectDefault); |
| |
| // Similar to the above, but returns the Go code as a string without |
| // appending anything to the current buffer. |
| std::pair<bool, std::string> generateTypeToString(const DWARFDie &die); |
| |
| // Helpers to take care of specific type flavors. |
| bool generateBaseType(const DWARFDie &die); |
| bool generateStructType(const DWARFDie &die); |
| bool generateUnionType(const DWARFDie &die); |
| bool generateFcnType(const DWARFDie &die); |
| bool generateArrayType(const DWARFDie &die); |
| bool generateEnumType(const DWARFDie &die); |
| |
| // Postprocess a struct member. |
| bool generateMember(const DWARFDie &die); |
| |
| // Assorted helpers. |
| bool useTypeName(const DWARFDie &die, TypeNameDisp disp); |
| bool isPtrToFunctionType(const DWARFDie &die); |
| bool isSuitableArrayDimTyp(const DWARFDie &die); |
| bool isSpuriousTypedef(const DWARFDie &die); |
| bool isBitField(const DWARFDie &die); |
| bool isAggregate(const DWARFDie &die); |
| const char *dieName(DWARFDie die); |
| DWARFDie forwardedType(DWARFDie die); |
| std::string enumLitString(DWARFFormValue &fvalue); |
| |
| bool isInvalidType(const DWARFDie &die) { |
| return invalidTypes_.find(die.getOffset()) != invalidTypes_.end(); |
| } |
| bool isBaseType(const DWARFDie &die) { |
| return die.getTag() == dwarf::DW_TAG_base_type; |
| } |
| |
| bool typeSizeKnown(const DWARFDie &die) { |
| return typeSize_.find(die.getOffset()) != typeSize_.end(); |
| } |
| |
| uint64_t typeSize(const DWARFDie &die) { |
| auto it = typeSize_.find(die.getOffset()); |
| assert(it != typeSize_.end()); |
| return it->second; |
| } |
| uint64_t typeOfSize(const DWARFDie &die) { |
| DWARFDie typ = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| assert(typ.isValid()); |
| auto it = typeSize_.find(typ.getOffset()); |
| assert(it != typeSize_.end()); |
| return it->second; |
| } |
| |
| void setTypeSize(const DWARFDie &die, uint64_t siz) { |
| auto it = typeSize_.find(die.getOffset()); |
| if (it != typeSize_.end()) { |
| assert(siz == it->second); |
| } else { |
| typeSize_[die.getOffset()] = siz; |
| } |
| } |
| |
| bool typeAlignKnown(const DWARFDie &die) { |
| return typeAlign_.find(die.getOffset()) != typeAlign_.end(); |
| } |
| |
| uint64_t typeAlign(const DWARFDie &die) { |
| auto it = typeAlign_.find(die.getOffset()); |
| if (it != typeAlign_.end()) |
| return it->second; |
| assert(isInvalidType(die)); |
| return 1; |
| } |
| |
| void setTypeAlign(const DWARFDie &die, uint64_t aln) { |
| auto it = typeAlign_.find(die.getOffset()); |
| if (it != typeAlign_.end()) { |
| assert(aln == it->second); |
| } else { |
| typeAlign_[die.getOffset()] = aln; |
| } |
| } |
| |
| private: |
| // Names of types emitted. To avoid clases between macros + types. |
| std::unordered_set<std::string> emittedTypeNames_; |
| |
| // To detect cycles in a type graph. Indexed by DIE offset. |
| std::unordered_set<uint32_t> visited_; |
| |
| // Records types unrepresentable in Go. Indexed by DIE offset. |
| std::unordered_set<uint32_t> invalidTypes_; |
| |
| // Referenced structs with no defined body (eg: "struct X;"). |
| // Indexed by DIE offset. |
| std::unordered_set<uint32_t> externalStructs_; |
| |
| // Anonymous sub-structure types within unions. Indexed by DIE offset. |
| std::unordered_set<uint32_t> anonSubstructure_; |
| |
| // Enumerated type literals. Indexed by enum literal name. |
| std::unordered_map<std::string, std::string> enumLiterals_; |
| |
| // Type size and alignment requirement. Indexed by DIE offset. |
| std::unordered_map<uint32_t, uint64_t> typeSize_; |
| std::unordered_map<uint32_t, uint32_t> typeAlign_; |
| |
| // Queue of interesting DIEs to examine. |
| std::vector<uint32_t> queue_; |
| |
| // Current DWARF compilation unit. |
| DWARFCompileUnit *cu_; // current compilation unit |
| |
| // DWARF die offset of top-level type DIE being visited. |
| uint32_t curDieOffset_; |
| |
| // Count of pad bytes used while processing bitfields. |
| uint32_t padcount_; |
| |
| // Pointer size in bytes. |
| uint32_t ptrSize_; |
| |
| // Set initially to false while we examine all type info, then set to |
| // true for a second pass through to emit types. |
| bool emit_; |
| }; |
| |
| constexpr uint32_t invalidOffset = ((unsigned)-1); |
| |
| GoDumpHelper::GoDumpHelper(raw_ostream &os) |
| : DumpManager(os), |
| curDieOffset_(invalidOffset), |
| padcount_(0), |
| ptrSize_(PointerSize), |
| emit_(false) |
| { |
| } |
| |
| const char *GoDumpHelper::dieName(DWARFDie die) |
| { |
| auto formval = die.find(dwarf::DW_AT_name); |
| if (!formval) |
| return nullptr; |
| auto cstr = formval->getAsCString(); |
| if (!cstr) |
| return nullptr; |
| return *cstr; |
| } |
| |
| void GoDumpHelper::enqueueType(const DWARFDie &die) |
| { |
| queue_.push_back(cu_->getDIEIndex(die)); |
| visitType(die); |
| } |
| |
| void GoDumpHelper::enqueueVariable(const DWARFDie &die) |
| { |
| queue_.push_back(cu_->getDIEIndex(die)); |
| DWARFDie typ = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| assert(typ.isValid()); |
| visitType(typ); |
| } |
| |
| // Walk the DWARF DIE chain for the specified compilation unit, |
| // queuing up interesting DIEs for later post-processing. As each type |
| // or variable is enqueued we'll visit the type associated with it, so |
| // as to discover invalid types and establish the size/alignment of |
| // all interesting types. |
| |
| void GoDumpHelper::readDwarf(DWARFCompileUnit *cu) |
| { |
| assert(cu); |
| cu_ = cu; |
| for (const auto &entry : cu_->dies()) { |
| DWARFDie die(cu_, &entry); |
| if (isType(die.getTag()) && |
| (dieName(die) != nullptr || |
| die.getTag() == dwarf::DW_TAG_enumeration_type)) |
| enqueueType(die); |
| else if (die.getTag() == dwarf::DW_TAG_variable) |
| enqueueVariable(die); |
| if (Trace) |
| die.dump(); |
| } |
| } |
| |
| void GoDumpHelper::visitType(const DWARFDie &die) |
| { |
| // Skip base types at the top level (they will be emitted inline |
| // where needed). |
| if (isBaseType(die)) |
| return; |
| |
| // Skip spurious typedefs ("type X X"), which crop up a fair |
| // amount with structs (ex: "typedef struct A { ... } A;"). |
| if (isSpuriousTypedef(die)) |
| return; |
| |
| if (Trace) { |
| std::cerr << "visit offset " << std::hex << die.getOffset(); |
| if (dieName(die)) |
| std::cerr << " " << dieName(die); |
| std::cerr << "\n"; |
| } |
| |
| initBuf(); |
| const char *cname = dieName(die); |
| curDieOffset_ = die.getOffset(); |
| padcount_ = 0; |
| if (emit_) |
| visited_.clear(); |
| bool ok = generateType(die); |
| curDieOffset_ = invalidOffset; |
| |
| if (emit_ && cname != nullptr) { |
| if (emittedTypeNames_.find(cname) != emittedTypeNames_.end()) |
| return; |
| if (! ok) |
| os() << "// "; |
| else |
| emittedTypeNames_.insert(cname); |
| os() << "type _" << cname << " " << buf().str(); |
| os() << "\n"; |
| |
| if (ok) { |
| // For struct and union types, emit a size constant |
| DWARFDie fwd(forwardedType(die)); |
| if (fwd.getTag() == dwarf::DW_TAG_structure_type || |
| fwd.getTag() == dwarf::DW_TAG_union_type) { |
| assert(typeSizeKnown(fwd)); |
| os() << "const _sizeof_" << cname << " = " << typeSize(fwd) << "\n"; |
| } |
| } |
| } |
| } |
| |
| static const char *bitsTag(unsigned byteSize) { |
| switch(byteSize) { |
| case 1: return "8"; |
| case 2: return "16"; |
| case 4: return "32"; |
| case 8: return "64"; |
| case 16: return "128"; |
| } |
| return nullptr; |
| } |
| |
| std::string GoDumpHelper::enumLitString(DWARFFormValue &fvalue) |
| { |
| std::stringstream ss; |
| auto uval = fvalue.getAsUnsignedConstant(); |
| auto sval = fvalue.getAsSignedConstant(); |
| if (uval) { |
| ss << *uval; |
| } else if (sval) { |
| ss << *sval; |
| } |
| return ss.str(); |
| } |
| |
| bool GoDumpHelper::generateEnumType(const DWARFDie &die) |
| { |
| // Enumerated types wind up as simple uint's in Go. |
| auto byteSize = dwarf::toUnsigned(die.find(dwarf::DW_AT_byte_size)); |
| assert(byteSize); |
| const char *bits = bitsTag(*byteSize); |
| if (!bits) |
| return false; |
| setTypeAlign(die, *byteSize); |
| buf() << "uint" << bits; |
| |
| // Our overall goal is to have enumeration types trump macro |
| // definitions; to enable this, macros and enum literals are |
| // buffered up and then combined/reconciled as part of the |
| // emit process. |
| bool rval = true; |
| DWARFDie child = die.getFirstChild(); |
| while (child && !child.isNULL()) { |
| if (child.getTag() == dwarf::DW_TAG_enumerator) { |
| const char *name = dieName(child); |
| // FIXME: avoid clash with Go keywords here? |
| auto val = child.find(dwarf::DW_AT_const_value); |
| assert(val); |
| std::string s = enumLitString(*val); |
| if (s.empty()) |
| rval = false; |
| else { |
| std::string n(name); |
| if (enumLiterals_.find(n) == enumLiterals_.end()) { |
| enumLiterals_[n] = s; |
| addEnumLiteralPseudoMacro(n, s); |
| } |
| } |
| } |
| child = child.getSibling(); |
| } |
| |
| return rval; |
| } |
| |
| bool GoDumpHelper::isSpuriousTypedef(const DWARFDie &die) |
| { |
| if (die.getTag() != dwarf::DW_TAG_typedef) |
| return false; |
| |
| // For C constructs such as "typedef struct X { ... } X;" in the |
| // DWARF we'll see first a struct type with named type X, followed |
| // by a typedef type with name X, which would result in "type X X", |
| // which is not what we want. |
| DWARFDie tgtDie = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| if (!tgtDie.isValid()) |
| return false; |
| const char *toname = dieName(tgtDie); |
| if (toname) { |
| const char *fromname = dieName(die); |
| if (fromname && !strcmp(fromname, toname)) |
| return true; |
| } |
| return false; |
| } |
| |
| bool GoDumpHelper::isPtrToFunctionType(const DWARFDie &die) |
| { |
| if (die.getTag() != dwarf::DW_TAG_pointer_type) |
| return false; |
| DWARFDie toDie = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| if (! toDie.isValid()) |
| return false; |
| return toDie.getTag() == dwarf::DW_TAG_subroutine_type; |
| } |
| |
| bool GoDumpHelper::isSuitableArrayDimTyp(const DWARFDie &die) |
| { |
| // FIXME: no support yet for enumerated type as array dim. |
| if (!isBaseType(die)) |
| return false; |
| auto byteSize = dwarf::toUnsigned(die.find(dwarf::DW_AT_byte_size)); |
| assert(byteSize); |
| if (*byteSize < 1 || *byteSize > 8) |
| return false; |
| auto encoding = dwarf::toUnsigned(die.find(dwarf::DW_AT_encoding)); |
| assert(encoding); |
| if (*encoding != dwarf::DW_ATE_signed && |
| *encoding != dwarf::DW_ATE_unsigned_char && |
| *encoding != dwarf::DW_ATE_signed_char && |
| *encoding != dwarf::DW_ATE_unsigned) |
| return false; |
| return true; |
| } |
| |
| bool GoDumpHelper::generateArrayType(const DWARFDie &die) |
| { |
| bool rval = true; |
| |
| DWARFDie eltyp = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| assert(eltyp.isValid()); |
| std::pair<bool, std::string> eresult = generateTypeToString(eltyp); |
| if (! eresult.first) |
| rval = false; |
| std::string etgen(eresult.second); |
| setTypeAlign(die, typeAlign(eltyp)); |
| |
| DWARFDie child = die.getFirstChild(); |
| uint64_t totElements = 1; |
| bool zeroDim = false; |
| while (child && !child.isNULL()) { |
| if (child.getTag() == dwarf::DW_TAG_subrange_type) { |
| DWARFDie ctyp = child.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| if (!ctyp.isValid()) { |
| // This corresponds to "[0]" |
| buf() << "[0]"; |
| zeroDim = true; |
| } else { |
| if (! isSuitableArrayDimTyp(ctyp)) |
| rval = false; |
| // NB: don't expect to see a lower bound here or non-constant |
| // upper bound. |
| auto ubval = child.find(dwarf::DW_AT_upper_bound); |
| auto count = child.find(dwarf::DW_AT_count); |
| if (ubval) { |
| auto cval = ubval->getAsUnsignedConstant(); |
| assert(cval); |
| buf() << "[" << *cval << "+1]"; |
| totElements = (*cval+1) * totElements; |
| } else if (count) { |
| auto cval = count->getAsUnsignedConstant(); |
| assert(cval); |
| buf() << "[" << *cval << "]"; |
| totElements = *cval * totElements; |
| } else { |
| // This corresponds to "[0]" |
| buf() << "[0]"; |
| zeroDim = true; |
| } |
| } |
| } |
| child = child.getSibling(); |
| } |
| if (zeroDim) |
| totElements = 0; |
| |
| buf() << etgen; |
| |
| // NB: array types may be lacking a byte size attribute. If so, set |
| // size manually. |
| auto byteSize = dwarf::toUnsigned(die.find(dwarf::DW_AT_byte_size)); |
| if (!byteSize) |
| setTypeSize(die, totElements * typeSize(eltyp)); |
| |
| return rval; |
| } |
| |
| bool GoDumpHelper::generateFcnType(const DWARFDie &die) |
| { |
| bool rval = true; |
| |
| // Params |
| buf() << "func("; |
| bool com = false; |
| for (DWARFDie child : die.children()) { |
| if (com) |
| buf() << ", "; |
| DWARFDie ctyp = child.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| assert(ctyp.isValid()); |
| if (!generateType(ctyp)) |
| rval = false; |
| com = true; |
| } |
| buf() << ") "; |
| |
| // Return type |
| DWARFDie rtyp = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| if (rtyp.isValid()) { |
| if (!generateType(rtyp)) |
| rval = false; |
| } |
| |
| // From a practical perspective the only function types of interest |
| // for us are pointer-to-function types, so here we create fictional |
| // values for size and alignment of this type (this makes things easier |
| // if this DIE is the target of a typedef). |
| setTypeSize(die, 0); |
| setTypeAlign(die, 0); |
| |
| return rval; |
| } |
| |
| bool GoDumpHelper::generateMember(const DWARFDie &die) |
| { |
| bool rval = true; |
| |
| const char *name = dieName(die); |
| bool anonSub = false; |
| if (!name) { |
| // This corresponds to an anonymous sub-union, e.g. something like |
| // |
| // struct x { |
| // union { int q; double z; }; |
| // ... |
| // } |
| // |
| // From the compiler's point of view, "q" and "z" are effectively |
| // fields within x, which has to be reflected in the generated Go code. |
| // Note: to make matters more complicated, anonymous structures are |
| // also allowed. Example: |
| // |
| // union { struct { int x; double z; int y; }; |
| // struct { char c4[4]; |
| // struct { double quix; char kkk; }; double k; }; }; |
| // |
| // For the oddball above, each of the nested fields (ex: kkk) is |
| // considered by the compiler to be a child of the top-level union (in |
| // terms of how a user would reference it). |
| DWARFDie ctyp = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| assert(ctyp.isValid()); |
| assert(ctyp.getTag() == dwarf::DW_TAG_union_type || |
| ctyp.getTag() == dwarf::DW_TAG_structure_type); |
| anonSubstructure_.insert(ctyp.getOffset()); |
| anonSub = true; |
| } else { |
| assert(name); |
| if (isGoKeyWord(name)) |
| buf() << "_"; |
| buf() << name << " "; |
| } |
| auto bitSize = die.find(dwarf::DW_AT_bit_size); |
| if (bitSize) { |
| // This corresponds to the case of a bitfield whose size/alignment |
| // happens to make it appear to be an integral field, e.g. |
| // |
| // struct { |
| // unsigned x:16; |
| // } |
| // |
| // Here we want to treat 'x' as if it were a simple "unsigned |
| // short" and not a bitfield. |
| // |
| auto bsval = bitSize->getAsUnsignedConstant(); |
| assert(bsval); |
| DWARFDie ctyp = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| assert(ctyp.isValid()); |
| assert(isBaseType(ctyp)); |
| auto encoding = dwarf::toUnsigned(ctyp.find(dwarf::DW_AT_encoding)); |
| assert(encoding); |
| if (*encoding == dwarf::DW_ATE_signed_char || |
| *encoding == dwarf::DW_ATE_signed) |
| buf() << "int" << *bsval; |
| else |
| buf() << "uint" << *bsval; |
| } else { |
| DWARFDie ctyp = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| assert(ctyp.isValid()); |
| if (!generateType(ctyp)) |
| rval = false; |
| } |
| if (! anonSub) |
| buf() << "; "; |
| return rval; |
| } |
| |
| bool GoDumpHelper::generateUnionType(const DWARFDie &die) |
| { |
| bool rval = true; |
| if (anonSubstructure_.find(die.getOffset()) == anonSubstructure_.end()) |
| buf() << "struct { "; |
| std::pair<raw_string_ostream *, std::string *> pauseState; |
| |
| // Walk the union members. We want to emit only the first field |
| // (since Go has no unions), so pause buffering after the first |
| // field and resume after we are done. |
| DWARFDie child = die.getFirstChild(); |
| uint64_t csiz = 0; |
| uint64_t calign = 0; |
| uint64_t maxalign = 0; |
| bool firstchild = true; |
| auto padcountsave = 0; |
| while (child && !child.isNULL()) { |
| if (child.getTag() == dwarf::DW_TAG_member) { |
| // Replace bitfields with padding. |
| auto bitsize = child.find(dwarf::DW_AT_bit_size); |
| if (bitsize) |
| continue; |
| |
| rval &= generateMember(child); |
| DWARFDie ctyp = child.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| if (firstchild) { |
| calign = typeAlign(ctyp); |
| csiz = typeSize(ctyp); |
| pauseState = pauseBuf(); |
| padcountsave = padcount_; |
| firstchild = false; |
| } |
| maxalign = std::max(typeAlign(ctyp), maxalign); |
| } |
| child = child.getSibling(); |
| } |
| if (pauseState.first != nullptr) { |
| padcount_ = padcountsave; |
| restoreBuf(pauseState.first, pauseState.second); |
| } |
| |
| // Pad out to the required size |
| auto byteSize = dwarf::toUnsigned(die.find(dwarf::DW_AT_byte_size)); |
| assert(byteSize); |
| if (csiz < *byteSize) { |
| unsigned padAmt = *byteSize - csiz; |
| buf() << "Godump_" << padcount_++ << "_pad [" << padAmt << "]byte; "; |
| } |
| |
| // Enforce alignment |
| if (maxalign > calign && maxalign > 1) { |
| buf() << "Godump_" << padcount_++ << "_align [0]int" |
| << bitsTag(maxalign) << "; "; |
| } |
| setTypeAlign(die, maxalign); |
| |
| if (anonSubstructure_.find(die.getOffset()) == anonSubstructure_.end()) |
| buf() << "}"; |
| return rval; |
| } |
| |
| // isBitField returns TRUE if a given member or field is a bitfield. |
| // |
| // Notes: |
| // - some compilers emit DW_AT_bit_offset for bitfields and others use the |
| // more recent DW_AT_data_bit_offset; we need to handle both. |
| // - older versions of GCC emit DW_AT_byte_size for all fields; newer |
| // versions leave this out (presumably assuming that the size can be |
| // derived from the underlying type). Use the byte size attribute if |
| // present, otherwise fall back on the type ref. |
| bool GoDumpHelper::isBitField(const DWARFDie &die) |
| { |
| auto bitSize = die.find(dwarf::DW_AT_bit_size); |
| if (!bitSize) |
| return false; |
| uint64_t tsz = 0; |
| auto byteSize = die.find(dwarf::DW_AT_byte_size); |
| if (!byteSize) { |
| tsz = typeOfSize(die); |
| } else { |
| auto byval = byteSize->getAsUnsignedConstant(); |
| assert(byval); |
| tsz = *byval; |
| } |
| auto bitOffset = die.find(dwarf::DW_AT_data_bit_offset); |
| if (!bitOffset) { |
| bitOffset = die.find(dwarf::DW_AT_bit_offset); |
| } |
| assert(bitOffset); |
| auto bsval = bitSize->getAsUnsignedConstant(); |
| auto boval = bitOffset->getAsUnsignedConstant(); |
| assert(bsval && boval); |
| if (*boval % *bsval == 0 && |
| *bsval % tsz == 0 && |
| (*bsval == 8 || *bsval == 16 || *bsval == 32 || *bsval == 64)) |
| return false; |
| return true; |
| } |
| |
| bool GoDumpHelper::generateStructType(const DWARFDie &die) |
| { |
| if (anonSubstructure_.find(die.getOffset()) == anonSubstructure_.end()) |
| buf() << "struct { "; |
| |
| // Collect members. Note that DWARF allows the producer to include |
| // other things (such as other types) as direct children of the |
| // struct type DIE, so we have to allow for that possibility here. |
| std::vector<DWARFDie> members; |
| DWARFDie child = die.getFirstChild(); |
| while (child && !child.isNULL()) { |
| if (child.getTag() == dwarf::DW_TAG_member) { |
| members.push_back(child); |
| } |
| child = child.getSibling(); |
| } |
| |
| // Walk the members. |
| uint64_t accumSize = 0; |
| uint64_t maxAlign = 0; |
| bool rval = true; |
| bool prevBitField = false; |
| for (unsigned idx = 0; idx < members.size(); ++idx) { |
| auto &member = members[idx]; |
| |
| // Replace bitfields with padding. |
| if (isBitField(member)) { |
| prevBitField = true; |
| continue; |
| } |
| |
| // Padding if needed |
| if (idx != 0) { |
| auto dml = member.find(dwarf::DW_AT_data_member_location); |
| assert(dml); |
| auto dmlval = dml->getAsUnsignedConstant(); |
| assert(dmlval); |
| if (accumSize < dmlval) { |
| unsigned padAmt = *dmlval - accumSize; |
| if (prevBitField) |
| buf() << "Godump_" << padcount_++ << "_pad [" << padAmt << "]byte; "; |
| accumSize += padAmt; |
| } |
| prevBitField = false; |
| } |
| |
| rval &= generateMember(member); |
| DWARFDie mtyp = member.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| maxAlign = std::max(maxAlign, typeAlign(mtyp)); |
| auto memberBitSize = member.find(dwarf::DW_AT_bit_size); |
| if (memberBitSize) { |
| auto bsval = memberBitSize->getAsUnsignedConstant(); |
| assert(bsval); |
| accumSize += *bsval / 8; |
| } else { |
| accumSize += typeSize(mtyp); |
| } |
| } |
| setTypeAlign(die, maxAlign); |
| |
| // Handle the "external" struct case, e.g. something like |
| // |
| // typedef struct definedSomewhereElse btyp; |
| // typedef btyp *pbtyp; |
| // extern pbytp *p; |
| // |
| // There isn't a direct Go equivalent here, so emit a dummy |
| // in such cases and make a record of what's happened. |
| auto isdecl = die.find(dwarf::DW_AT_declaration); |
| if (isdecl) { |
| auto ival = isdecl->getAsUnsignedConstant(); |
| assert(ival); |
| if (*ival) { |
| externalStructs_.insert(die.getOffset()); |
| setTypeSize(die, 0); |
| setTypeAlign(die, 0); |
| } |
| } |
| |
| // Padding if needed |
| auto byteSize = typeSize(die); |
| if (accumSize < byteSize) { |
| unsigned padAmt = byteSize - accumSize; |
| buf() << "Godump_" << padcount_++ << "_pad [" << padAmt << "]byte; "; |
| } |
| |
| if (anonSubstructure_.find(die.getOffset()) == anonSubstructure_.end()) |
| buf() << "}"; |
| |
| return rval; |
| } |
| |
| bool GoDumpHelper::generateBaseType(const DWARFDie &die) |
| { |
| auto byteSize = dwarf::toUnsigned(die.find(dwarf::DW_AT_byte_size)); |
| assert(byteSize); |
| unsigned bytes = *byteSize; |
| const char *bits = bitsTag(bytes); |
| if (!bits) |
| return false; |
| |
| auto encoding = dwarf::toUnsigned(die.find(dwarf::DW_AT_encoding)); |
| assert(encoding); |
| switch(*encoding) { |
| case dwarf::DW_ATE_boolean: |
| setTypeAlign(die, 1); |
| buf() << "bool"; |
| return true; |
| case dwarf::DW_ATE_unsigned_char: { |
| setTypeAlign(die, 1); |
| assert(bytes == 1); |
| buf() << "uint8"; |
| return true; |
| } |
| case dwarf::DW_ATE_signed_char: { |
| setTypeAlign(die, 1); |
| assert(bytes == 1); |
| buf() << "int8"; |
| return true; |
| } |
| case dwarf::DW_ATE_unsigned: { |
| setTypeAlign(die, bytes); |
| // Go does not support uint128 |
| if (bytes > 8) |
| return false; |
| buf() << "uint" << bits; |
| return true; |
| } |
| case dwarf::DW_ATE_signed: { |
| setTypeAlign(die, bytes); |
| // Go does not support int128 |
| if (bytes > 8) |
| return false; |
| buf() << "int" << bits; |
| return true; |
| } |
| case dwarf::DW_ATE_float: { |
| setTypeAlign(die, bytes); |
| // Go does not support float128 / long double |
| if (bytes > 8) |
| return false; |
| buf() << "float" << bits; |
| return true; |
| } |
| case dwarf::DW_ATE_complex_float: { |
| setTypeAlign(die, bytes/2); |
| buf() << "complex" << bits; |
| return true; |
| } |
| default: { |
| return false; |
| } |
| } |
| return false; |
| } |
| |
| DWARFDie GoDumpHelper::forwardedType(DWARFDie die) |
| { |
| while (die.getTag() == dwarf::DW_TAG_typedef || |
| die.getTag() == dwarf::DW_TAG_restrict_type || |
| die.getTag() == dwarf::DW_TAG_volatile_type || |
| die.getTag() == dwarf::DW_TAG_const_type) { |
| die = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| assert(die.isValid()); |
| } |
| return die; |
| } |
| |
| bool GoDumpHelper::isAggregate(const DWARFDie &die) |
| { |
| return (die.getTag() == dwarf::DW_TAG_structure_type || |
| die.getTag() == dwarf::DW_TAG_union_type || |
| die.getTag() == dwarf::DW_TAG_array_type); |
| } |
| |
| // When generating a Go representation for a given DWARF type T |
| // that refers to a set of other types { T1, T2, ... TN }, at |
| // various points we have to decide whether to refer to a given child type |
| // TK via TK's names (if it has a name) or whether to emit concrete |
| // definition for TK. This routine helps with making that decision. |
| |
| bool GoDumpHelper::useTypeName(const DWARFDie &die, TypeNameDisp disp) |
| { |
| // Type has to have a name for us to use it. |
| const char *name = dieName(die); |
| if (!name) |
| return false; |
| |
| // Top-level die that we're in the process of emitting? |
| if (die.getOffset() == curDieOffset_) |
| return false; |
| |
| // If we're in the process of visiting this type, we have to |
| // use the emitted name (to avoid infinite recursion). |
| if (visited_.find(die.getOffset()) != visited_.end()) { |
| assert(name); |
| return true; |
| } |
| |
| // Don't try to use the name stored within a base type |
| // (among other things, they are allowed to have spaces) |
| if (isBaseType(die)) |
| return false; |
| |
| // On the first pass (prior to emit) walk as many types as possible. |
| if (!emit_) |
| return false; |
| |
| // Take into account preferences here. |
| if (disp == TN_AvoidName) |
| return false; |
| if (disp == TN_PreferName) |
| return true; |
| |
| // Here we try to mimic the GCC -fgo-dump-spec implementation, which |
| // has specific preferences about whether/where to use a previously |
| // emitted name. |
| DWARFDie fwd(forwardedType(die)); |
| if (!isAggregate(fwd) && !isPtrToFunctionType(fwd)) |
| return false; |
| |
| return true; |
| } |
| |
| bool GoDumpHelper::generateType(const DWARFDie &die, TypeNameDisp disp) |
| { |
| // Invalid? |
| if (isInvalidType(die)) |
| return false; |
| |
| // Record size for posterity. |
| auto byteSize = dwarf::toUnsigned(die.find(dwarf::DW_AT_byte_size)); |
| if (byteSize) |
| setTypeSize(die, *byteSize); |
| |
| // Use a reference to a previously emitted type name if appropriate. |
| if (useTypeName(die, disp)) { |
| const char *name = dieName(die); |
| assert(name); |
| buf() << "_" << name; |
| return true; |
| } |
| |
| // Reset top-level DIE offset. |
| curDieOffset_ = invalidOffset; |
| |
| // Look to see what we're dealing with. |
| bool rval = true; |
| dwarf::Tag tag = die.getTag(); |
| switch(tag) { |
| case dwarf::DW_TAG_base_type: { |
| rval = generateBaseType(die); |
| break; |
| } |
| case dwarf::DW_TAG_pointer_type: { |
| // NB: for "void *" we may see no target type. |
| DWARFDie toDie = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| if (! toDie.isValid()) { |
| // Treat this case as "*byte" |
| buf() << "*byte"; |
| } else { |
| if (toDie.getTag() != dwarf::DW_TAG_subroutine_type) |
| buf() << "*"; |
| bool toDieValid = generateType(toDie); |
| if (!toDieValid) { |
| buf() << "byte"; |
| } |
| } |
| setTypeSize(die, ptrSize_); |
| setTypeAlign(die, ptrSize_); |
| break; |
| } |
| case dwarf::DW_TAG_typedef: { |
| DWARFDie tgtDie = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| // Interestingly, for a construct like: |
| // |
| // typedef void MyOpaque; |
| // typedef MyOpaque *MyOpaquePointer; |
| // |
| // the DIE corresponding to "MyOpaque" will be a typedef with no |
| // type reference attribute; handle this case accordingly. |
| if (!tgtDie.isValid()) { |
| rval = false; |
| } else { |
| rval = generateType(tgtDie, TN_AvoidName); |
| setTypeAlign(die, typeAlign(tgtDie)); |
| setTypeSize(die, typeSize(tgtDie)); |
| } |
| break; |
| } |
| case dwarf::DW_TAG_structure_type: { |
| visited_.insert(die.getOffset()); |
| rval = generateStructType(die); |
| break; |
| } |
| case dwarf::DW_TAG_union_type: { |
| rval = generateUnionType(die); |
| break; |
| } |
| case dwarf::DW_TAG_enumeration_type: { |
| rval = generateEnumType(die); |
| break; |
| } |
| case dwarf::DW_TAG_subroutine_type: { |
| rval = generateFcnType(die); |
| break; |
| } |
| case dwarf::DW_TAG_array_type: { |
| rval = generateArrayType(die); |
| break; |
| } |
| case dwarf::DW_TAG_const_type: |
| case dwarf::DW_TAG_restrict_type: |
| case dwarf::DW_TAG_volatile_type: { |
| // Throw away these qualifiers. |
| DWARFDie qtyp = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| if (!qtyp.isValid()) { |
| rval = false; |
| } else { |
| rval = generateType(qtyp); |
| setTypeAlign(die, typeAlign(qtyp)); |
| setTypeSize(die, typeSize(qtyp)); |
| } |
| break; |
| } |
| default: |
| assert(false); |
| } |
| |
| if (!rval) |
| invalidTypes_.insert(die.getOffset()); |
| |
| return rval; |
| } |
| |
| std::pair<bool, std::string> |
| GoDumpHelper::generateTypeToString(const DWARFDie &die) |
| { |
| auto pauseState = pauseBuf(); |
| bool ok = generateType(die); |
| std::string str(buf().str()); |
| restoreBuf(pauseState.first, pauseState.second); |
| return std::make_pair(ok, str); |
| } |
| |
| void GoDumpHelper::emitVariable(const DWARFDie &die) |
| { |
| initBuf(); |
| |
| DWARFDie typ = die.getAttributeValueAsReferencedDie(dwarf::DW_AT_type); |
| assert(typ.isValid()); |
| bool ok = generateType(typ, TN_PreferName); |
| |
| // In cases where there is a clash between a named type and a variable, |
| // we choose the type and skip the variable. |
| const char *name = dieName(die); |
| if (emittedTypeNames_.find(name) != emittedTypeNames_.end()) |
| ok = false; |
| |
| if (! ok) |
| os() << "// "; |
| assert(name); |
| os() << "var _" << name << " " << buf().str() << "\n"; |
| } |
| |
| void GoDumpHelper::emit() |
| { |
| // Tell the visit routines below to emit Go code. |
| emit_ = true; |
| |
| for (auto idx : queue_) { |
| DWARFDie die = cu_->getDIEAtIndex(idx); |
| if (isType(die.getTag())) |
| visitType(die); |
| else if (die.getTag() == dwarf::DW_TAG_variable) |
| emitVariable(die); |
| } |
| |
| // Emit macros once we've finished with types. |
| emitMacros(os(), emittedTypeNames_); |
| } |
| |
| static void error(StringRef Prefix, std::error_code EC) { |
| if (!EC) |
| return; |
| errs() << Prefix << ": " << EC.message() << "\n"; |
| exit(1); |
| } |
| |
| static int visitMacrosFile(const std::string &infile, |
| GoDumpHelper &state, |
| raw_ostream &os) |
| { |
| std::string line; |
| std::ifstream macfile(infile); |
| unsigned lno = 0; |
| if (macfile.is_open()) { |
| while (std::getline(macfile, line)) |
| { |
| lno += 1; |
| state.visitMacroLine(line, lno); |
| } |
| macfile.close(); |
| state.postProcessMacros(); |
| } else { |
| errs() << "error: unable to open macro file " << infile << "\n"; |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| struct ObjectState { |
| std::unique_ptr<MemoryBuffer> mbuf_; |
| std::unique_ptr<Binary> binary_; |
| std::unique_ptr<DWARFContext> dwctxt_; |
| }; |
| |
| static int visitObjectFile(const std::string &infile, |
| GoDumpHelper &state, |
| ObjectState &ostate, |
| raw_ostream &os) |
| { |
| ErrorOr<std::unique_ptr<MemoryBuffer>> buffOrErr = |
| MemoryBuffer::getFile(infile); |
| error(infile, buffOrErr.getError()); |
| std::unique_ptr<MemoryBuffer> buffer = std::move(buffOrErr.get()); |
| ostate.mbuf_.reset(buffer.release()); |
| |
| Expected<std::unique_ptr<Binary>> binOrErr = |
| object::createBinary(*ostate.mbuf_); |
| error(infile, errorToErrorCode(binOrErr.takeError())); |
| std::unique_ptr<Binary> binary = std::move(binOrErr.get()); |
| ostate.binary_.reset(binary.release()); |
| |
| // NB: no MachO support at the moment |
| auto *obj = dyn_cast<ObjectFile>(ostate.binary_.get()); |
| if (obj == nullptr) { |
| errs() << "error: problems opening object file " << infile << "\n"; |
| return 1; |
| } |
| ostate.dwctxt_.reset(DWARFContext::create(*obj).release()); |
| |
| // Expect to see exactly one DWARF CU. |
| if (ostate.dwctxt_->getNumCompileUnits() < 1) { |
| errs() << "error: no DWARF compilation units found in " << infile << "\n"; |
| return 1; |
| } else if (ostate.dwctxt_->getNumCompileUnits() > 1) { |
| errs() << "error: unexpected multiple DWARF compilation " |
| << "units found in " << infile << "\n"; |
| return 1; |
| } |
| |
| DWARFCompileUnit *cu = |
| cast<DWARFCompileUnit>(ostate.dwctxt_->getUnitAtIndex(0)); |
| state.readDwarf(cu); |
| |
| return 0; |
| } |
| |
| int main(int argc, char **argv) { |
| |
| // Print a stack trace if we signal out. |
| sys::PrintStackTraceOnErrorSignal(argv[0]); |
| PrettyStackTraceProgram X(argc, argv); |
| llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. |
| |
| llvm::InitializeAllTargetInfos(); |
| llvm::InitializeAllTargetMCs(); |
| |
| cl::ParseCommandLineOptions( |
| argc, argv, |
| "Emit Go translation for type/const/macro information derived " |
| "from compilation of a C file.\n"); |
| |
| if (InputObjectFile.empty()) { |
| errs() << "error: supply input object file using -object option.\n"; |
| return 1; |
| } |
| |
| std::unique_ptr<ToolOutputFile> OutputFile; |
| if (!OutputFilename.empty()) { |
| std::error_code EC; |
| OutputFile = std::make_unique<ToolOutputFile>(OutputFilename, EC, |
| sys::fs::OF_None); |
| // Don't remove output file if we exit with an error. |
| OutputFile->keep(); |
| error("Unable to open output file" + OutputFilename, EC); |
| } |
| |
| raw_ostream &OS = OutputFile ? OutputFile->os() : outs(); |
| GoDumpHelper state(OS); |
| ObjectState ostate; |
| |
| int rc = 0; |
| if (! InputObjectFile.empty()) { |
| rc |= visitObjectFile(InputObjectFile, state, ostate, OS); |
| } |
| if (! InputMacrosFile.empty()) { |
| rc |= visitMacrosFile(InputMacrosFile, state, OS); |
| } |
| state.emit(); |
| |
| return rc; |
| } |