gollvm: support target CPU/features (-march=XXX, etc)
This patch adds real support for the "-march=" command line option, to
allow users to have the back end target a specific CPU (ex: 'nehalem')
within a given architecture (ex: x86-64). Each specific cpu/arch
selection gets translated into a set of "target feature" attributes
that are then attached to each LLVM function; specifying the correct
attributes is key to insuring that vectorization works properly.
Notes:
- creates a new Go helper program, "capture-fcn-attributes.go", that
developers can build and run to generate a header containing the
correct set of fcn attributes to use for specific CPUs/archs. The
expectation is that building and running this Go program is
something done by gollvm developers offline (as opposed to having it
be part of the cmake/ninja build process each time gollvm is built).
The helper program works by capturing the behavior of clang:
running clang on an input file for specific -march=... values
and then looking at the generated IR.
- unlike clang, gollvm won't support (at least initially) flags for
turning on/off individual features/instructions, since this would
add a great deal of complexity.
Change-Id: I99abf703608e0d1f5d00f6becc3f43cbdb60b394
Reviewed-on: https://go-review.googlesource.com/118322
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/bridge/go-llvm.cpp b/bridge/go-llvm.cpp
index 9d8db39..a6ffb3e 100644
--- a/bridge/go-llvm.cpp
+++ b/bridge/go-llvm.cpp
@@ -141,6 +141,16 @@
dibuildhelper_->addDebugPrefix(prefix);
}
+void Llvm_backend::setTargetCpuAttr(const std::string &cpu)
+{
+ targetCpuAttr_ = cpu;
+}
+
+void Llvm_backend::setTargetFeaturesAttr(const std::string &attrs)
+{
+ targetFeaturesAttr_ = attrs;
+}
+
void
Llvm_backend::verifyModule()
{
@@ -2456,6 +2466,10 @@
if (no_return)
fcn->addFnAttr(llvm::Attribute::NoReturn);
+ // attributes for target CPU and features
+ fcn->addFnAttr("target-cpu", targetCpuAttr_);
+ fcn->addFnAttr("target-features", targetFeaturesAttr_);
+
fcnValue = fcn;
// Fix up references to declaration of old type.
diff --git a/bridge/go-llvm.h b/bridge/go-llvm.h
index 06bf950..ab02d80 100644
--- a/bridge/go-llvm.h
+++ b/bridge/go-llvm.h
@@ -397,6 +397,10 @@
// Disable frame pointer elimination if set to true.
void setNoFpElim(bool b) { noFpElim_ = b; };
+ // Target CPU and features
+ void setTargetCpuAttr(const std::string &cpu);
+ void setTargetFeaturesAttr(const std::string &attrs);
+
// Personality function
llvm::Function *personalityFunction();
@@ -818,6 +822,10 @@
// Personality function
llvm::Function *personalityFunction_;
+
+ // Target cpu and attributes to be attached to any generated fcns.
+ std::string targetCpuAttr_;
+ std::string targetFeaturesAttr_;
};
#endif
diff --git a/driver/ArchCpusAttrs.h b/driver/ArchCpusAttrs.h
new file mode 100644
index 0000000..7064d9e
--- /dev/null
+++ b/driver/ArchCpusAttrs.h
@@ -0,0 +1,74 @@
+// DO NOT EDIT: this file auto-generated by the following command:
+//
+// ./capture-fcn-attributes -o ArchCpusAttrs.h -triples x86_64-unknown-linux-gnu
+//
+// in combination with clang:
+//
+// clang version 7.0.0 (trunk 333637) (llvm/trunk 333650)
+//
+
+typedef struct {
+ const char *cpu;
+ const char *attrs;
+} CpuAttrs;
+
+typedef struct {
+ const char *triple;
+ const CpuAttrs *cpuattrs;
+} TripleCpus;
+
+// triple: x86_64-unknown-linux-gnu
+static const CpuAttrs attrs0[] = {
+ // first entry is default cpu
+ { "x86-64", "+fxsr,+mmx,+sse,+sse2,+x87" },
+ { "amdfam10", "+3dnow,+3dnowa,+fxsr,+lzcnt,+mmx,+popcnt,+prfchw,+sahf,+sse,+sse2,+sse3,+sse4a,+x87" },
+ { "athlon-fx", "+3dnow,+3dnowa,+fxsr,+mmx,+prfchw,+sse,+sse2,+x87" },
+ { "athlon64", "+3dnow,+3dnowa,+fxsr,+mmx,+prfchw,+sse,+sse2,+x87" },
+ { "athlon64-sse3", "+3dnow,+3dnowa,+fxsr,+mmx,+prfchw,+sse,+sse2,+sse3,+x87" },
+ { "atom", "+cx16,+fxsr,+mmx,+movbe,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" },
+ { "barcelona", "+3dnow,+3dnowa,+fxsr,+lzcnt,+mmx,+popcnt,+prfchw,+sahf,+sse,+sse2,+sse3,+sse4a,+x87" },
+ { "bdver1", "+aes,+avx,+cx16,+fma4,+fxsr,+lwp,+lzcnt,+mmx,+pclmul,+popcnt,+prfchw,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+x87,+xop,+xsave" },
+ { "bdver2", "+aes,+avx,+bmi,+cx16,+f16c,+fma,+fma4,+fxsr,+lwp,+lzcnt,+mmx,+pclmul,+popcnt,+prfchw,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+tbm,+x87,+xop,+xsave" },
+ { "bdver3", "+aes,+avx,+bmi,+cx16,+f16c,+fma,+fma4,+fsgsbase,+fxsr,+lwp,+lzcnt,+mmx,+pclmul,+popcnt,+prfchw,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+tbm,+x87,+xop,+xsave,+xsaveopt" },
+ { "bdver4", "+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fma4,+fsgsbase,+fxsr,+lwp,+lzcnt,+mmx,+mwaitx,+pclmul,+popcnt,+prfchw,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+tbm,+x87,+xop,+xsave,+xsaveopt" },
+ { "bonnell", "+cx16,+fxsr,+mmx,+movbe,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" },
+ { "broadwell", "+adx,+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" },
+ { "btver1", "+cx16,+fxsr,+lzcnt,+mmx,+popcnt,+prfchw,+sahf,+sse,+sse2,+sse3,+sse4a,+ssse3,+x87" },
+ { "btver2", "+aes,+avx,+bmi,+cx16,+f16c,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+x87,+xsave,+xsaveopt" },
+ { "cannonlake", "+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512ifma,+avx512vbmi,+avx512vl,+bmi,+bmi2,+clflushopt,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+mpx,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+rtm,+sahf,+sgx,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" },
+ { "core-avx-i", "+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" },
+ { "core-avx2", "+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" },
+ { "core2", "+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" },
+ { "corei7", "+cx16,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" },
+ { "corei7-avx", "+aes,+avx,+cx16,+fxsr,+mmx,+pclmul,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" },
+ { "goldmont", "+aes,+clflushopt,+cx16,+fsgsbase,+fxsr,+mmx,+movbe,+mpx,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" },
+ { "goldmont-plus", "+aes,+clflushopt,+cx16,+fsgsbase,+fxsr,+mmx,+movbe,+mpx,+pclmul,+popcnt,+prfchw,+ptwrite,+rdpid,+rdrnd,+rdseed,+sahf,+sgx,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" },
+ { "haswell", "+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" },
+ { "icelake-client", "+adx,+aes,+avx,+avx2,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+gfni,+invpcid,+lzcnt,+mmx,+movbe,+mpx,+pclmul,+pku,+popcnt,+prfchw,+rdpid,+rdrnd,+rdseed,+rtm,+sahf,+sgx,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" },
+ { "icelake-server", "+adx,+aes,+avx,+avx2,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+gfni,+invpcid,+lzcnt,+mmx,+movbe,+mpx,+pclmul,+pconfig,+pku,+popcnt,+prfchw,+rdpid,+rdrnd,+rdseed,+rtm,+sahf,+sgx,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" },
+ { "ivybridge", "+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" },
+ { "k8", "+3dnow,+3dnowa,+fxsr,+mmx,+prfchw,+sse,+sse2,+x87" },
+ { "k8-sse3", "+3dnow,+3dnowa,+fxsr,+mmx,+prfchw,+sse,+sse2,+sse3,+x87" },
+ { "knl", "+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+prfchw,+rdrnd,+rdseed,+rtm,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" },
+ { "knm", "+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+avx512vpopcntdq,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+prfchw,+rdrnd,+rdseed,+rtm,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" },
+ { "nehalem", "+cx16,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" },
+ { "nocona", "+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" },
+ { "opteron", "+3dnow,+3dnowa,+fxsr,+mmx,+prfchw,+sse,+sse2,+x87" },
+ { "opteron-sse3", "+3dnow,+3dnowa,+fxsr,+mmx,+prfchw,+sse,+sse2,+sse3,+x87" },
+ { "penryn", "+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" },
+ { "sandybridge", "+aes,+avx,+cx16,+fxsr,+mmx,+pclmul,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" },
+ { "silvermont", "+aes,+cx16,+fxsr,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" },
+ { "skx", "+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+mpx,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+rtm,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" },
+ { "skylake", "+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+mpx,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+rtm,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" },
+ { "skylake-avx512", "+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+mpx,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+rtm,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" },
+ { "slm", "+aes,+cx16,+fxsr,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" },
+ { "tremont", "+aes,+cldemote,+clflushopt,+cx16,+fsgsbase,+fxsr,+gfni,+mmx,+movbe,+movdir64b,+movdiri,+mpx,+pclmul,+popcnt,+prfchw,+ptwrite,+rdpid,+rdrnd,+rdseed,+sahf,+sgx,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+waitpkg,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" },
+ { "westmere", "+aes,+cx16,+fxsr,+mmx,+pclmul,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" },
+ { "znver1", "+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+clzero,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+mwaitx,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" },
+ { "", "" } // sentinel
+};
+
+const TripleCpus triples[] = {
+ { "x86_64-unknown-linux-gnu", &attrs0[0] },
+ { "", nullptr } // sentinel
+};
diff --git a/driver/CompileGo.cpp b/driver/CompileGo.cpp
index 95d1696..e86a726 100644
--- a/driver/CompileGo.cpp
+++ b/driver/CompileGo.cpp
@@ -26,6 +26,10 @@
#include "Driver.h"
#include "ToolChain.h"
+namespace gollvm { namespace arch {
+#include "ArchCpusAttrs.h"
+} }
+
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -99,6 +103,8 @@
std::string asmOutFileName_;
std::unique_ptr<ToolOutputFile> asmout_;
std::unique_ptr<TargetLibraryInfoImpl> tlii_;
+ std::string targetCpuAttr_;
+ std::string targetFeaturesAttr_;
void createPasses(legacy::PassManager &MPM,
legacy::FunctionPassManager &FPM);
@@ -374,9 +380,9 @@
return false;
Options.AllowFPOpFusion = *dofuse;
- // Support -mcpu
+ // Support -march
std::string cpuStr;
- opt::Arg *cpuarg = args_.getLastArg(gollvm::options::OPT_mcpu_EQ);
+ opt::Arg *cpuarg = args_.getLastArg(gollvm::options::OPT_march_EQ);
if (cpuarg != nullptr) {
std::string val(cpuarg->getValue());
if (val == "native")
@@ -385,16 +391,45 @@
cpuStr = cpuarg->getValue();
}
- // Features.
- // FIXME: incorporate command line flags.
- SubtargetFeatures features;
- features.getDefaultSubtargetFeatures(triple_);
- std::string featStr = features.getString();
+ // Locate correct entry in architectures table for this triple
+ const gollvm::arch::CpuAttrs *cpuAttrs = nullptr;
+ for (unsigned i = 0; gollvm::arch::triples[i].cpuattrs != nullptr; i += 1) {
+ if (!strcmp(triple_.str().c_str(), gollvm::arch::triples[i].triple)) {
+ cpuAttrs = gollvm::arch::triples[i].cpuattrs;
+ break;
+ }
+ }
+ if (cpuAttrs == nullptr) {
+ errs() << progname_ << ": unable to determine target CPU features for "
+ << "target " << triple_.str() << "\n";
+ return false;
+ }
+
+ // If no CPU specified, use first entry. Otherwise look for CPU name.
+ if (!cpuStr.empty()) {
+ bool found = false;
+ while (strlen(cpuAttrs->cpu) != 0) {
+ if (!strcmp(cpuAttrs->cpu, cpuStr.c_str())) {
+ // found
+ found = true;
+ break;
+ }
+ cpuAttrs++;
+ }
+ if (!found) {
+ errs() << progname_ << ": invalid setting for -march:"
+ << " -- unable to identify CPU '" << cpuStr << "'\n";
+ return false;
+ }
+ }
+ targetCpuAttr_ = cpuAttrs->cpu;
+ targetFeaturesAttr_ = cpuAttrs->attrs;
// Create target machine
Optional<llvm::CodeModel::Model> CM = None;
target_.reset(
- TheTarget->createTargetMachine(triple_.getTriple(), cpuStr, featStr,
+ TheTarget->createTargetMachine(triple_.getTriple(),
+ targetCpuAttr_, targetFeaturesAttr_,
Options, driver_.reconcileRelocModel(),
CM, cgolvl_));
assert(target_.get() && "Could not allocate target machine!");
@@ -435,6 +470,8 @@
return false;
bridge_->setTraceLevel(*tl);
bridge_->setNoInline(args_.hasArg(gollvm::options::OPT_fno_inline));
+ bridge_->setTargetCpuAttr(targetCpuAttr_);
+ bridge_->setTargetFeaturesAttr(targetFeaturesAttr_);
// -f[no-]omit-frame-pointer
bool omitFp =
diff --git a/driver/Driver.cpp b/driver/Driver.cpp
index d7893f2..529ca1b 100644
--- a/driver/Driver.cpp
+++ b/driver/Driver.cpp
@@ -289,18 +289,6 @@
else
triple_ = Triple(sys::getDefaultTargetTriple());
- // Support -march
- std::string archStr;
- opt::Arg *archarg = args_.getLastArg(gollvm::options::OPT_march_EQ);
- if (archarg != nullptr) {
- std::string val(archarg->getValue());
- if (val == "native")
- archStr = sys::getHostCPUName();
- else
- archStr = archarg->getValue();
- triple_.setArchName(archStr);
- }
-
// Honor -dumpmachine
if (args_.hasArg(gollvm::options::OPT_dumpmachine)) {
llvm::outs() << triple_.str() << "\n";
diff --git a/driver/capture-fcn-attributes.go b/driver/capture-fcn-attributes.go
new file mode 100644
index 0000000..c3b64f4
--- /dev/null
+++ b/driver/capture-fcn-attributes.go
@@ -0,0 +1,551 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// This bootstrap helper program emits C++ code that encapsulates
+// information about available CPUs for a given architecture. It works
+// by invoking clang and/or llc and inspecting trace output and
+// generated IR. Note: the expectation is that this program will be
+// built and run "off line" to generate a header file that is then
+// checked in (as opposed to having it build and run as part of the
+// actual gollvm ninja/cmake build).
+//
+// The intent is to allow gollvm to support the "-march=XXX" flag in a
+// basic way without having to recreate/replicate all of the
+// architecture-specific machinery in the clang driver that deals
+// with feature flags and feature attributes for the available targets
+// (since this code is very complex).
+//
+// This general idea is that for a given target triple we want to
+// determine the set of legal values that can be supplied to
+// the -march=XXX command line option, along with the correct set
+// of feature attributes that apply for that cpu/arch (settings for
+// -mattr=YYY,ZZZ,...).
+//
+// The strategy is to first run clang and/or LLC using command line
+// options whose output (or error messages) list out available CPU
+// settings (either "llc -mcpu=help" or "clang -march=Illegal"
+// depending). Once the set of available CPUs is populated, we then
+// run clang with -emit-llvm and inspect the generated IR to collect
+// the set of attributes for each arch/cpu.
+//
+// Notes:
+// - not all versions of clang will produce a list of legal arch/cpu
+// values when presented with an illegal -march value (this seems to be
+// a recent development); this trick also doesn't seem to work
+// when cross compiling (suppling --target=XXX to clang). For the
+// cross-compile case, we fall back on running "llc".
+// - confusingly, llc's set of available CPUs is different
+// from clang's available set of CPUs, so there has to be some
+// weeding out of extra CPU names in some cases.
+//
+// Representative usage:
+//
+// % go build capture-fcn-attributes
+// % export PATH=<llvm bin dir>:$PATH
+// % ./capture-fcn-attributes -o HeaderFile.h -triples x86_64-unknown-linux-gnu
+// %
+
+package main
+
+import (
+ "bufio"
+ "flag"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "log"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "regexp"
+ "runtime"
+ "sort"
+ "strconv"
+ "strings"
+)
+
+const prog = `
+#include <inttypes.h>
+typedef struct {
+ uint64_t a[512];
+ uint64_t b[512];
+ uint64_t c[512];
+} vstuff;
+void Add512(vstuff *v) {
+ for (unsigned i = 0; i < 512; ++i) {
+ v->c[i] = v->a[i] + v->b[i];
+ }
+}
+`
+
+var (
+ noclflag = flag.Bool("noclean", false, "Don't clean temp dir")
+ verbflag = flag.Int("v", 0, "Verbose trace output level")
+ cpuflag = flag.String("cpu", "", "Generate for specified cpu or cpus")
+ triplesflag = flag.String("triples", "", "Select target triple(s)")
+ outfileflag = flag.String("o", "", "Output file")
+ exitst int
+ defaultTriple string
+)
+
+func verb(vlevel int, s string, a ...interface{}) {
+ if *verbflag >= vlevel {
+ fmt.Printf(s, a...)
+ fmt.Printf("\n")
+ }
+}
+
+func warn(s string, a ...interface{}) {
+ fmt.Fprintf(os.Stderr, s, a...)
+ fmt.Fprintf(os.Stderr, "\n")
+ exitst = 1
+}
+
+func fatal(s string, a ...interface{}) {
+ log.Fatalf(s, a...)
+}
+
+func usage(msg string) {
+ if len(msg) > 0 {
+ fmt.Fprintf(os.Stderr, "error: %s\n", msg)
+ }
+ fmt.Fprintf(os.Stderr, "usage: capture-fcn-attributes [flags]\n")
+ flag.PrintDefaults()
+ os.Exit(2)
+}
+
+type result struct {
+ cpu string
+ attrs string
+ supported bool
+ def bool
+}
+
+func tb(x bool) int {
+ if x {
+ return 1
+ }
+ return 0
+}
+
+type ByCpu []result
+
+func (a ByCpu) Len() int { return len(a) }
+func (a ByCpu) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a ByCpu) Less(i, j int) bool {
+ if a[i].def != a[j].def {
+ return tb(a[j].def) < tb(a[i].def)
+ }
+ return a[i].cpu < a[j].cpu
+}
+
+var qurx = regexp.MustCompile(`^"(.+)"$`)
+
+func qutrim(s string) string {
+ sl := qurx.FindStringSubmatch(s)
+ if len(sl) == 2 {
+ return string(sl[1])
+ }
+ return s
+}
+
+// Attributes strings are of the form { X Y Z=W A=B "Q"="R" ... }
+
+func parseAttrs(raw string) (string, string) {
+ fields := strings.Fields(raw)
+ features := ""
+ cpu := ""
+ for _, f := range fields {
+ sl := strings.Split(f, "=")
+ if len(sl) != 2 {
+ continue
+ }
+ k, v := qutrim(sl[0]), qutrim(sl[1])
+ if k == "target-features" {
+ features = v
+ } else if k == "target-cpu" {
+ cpu = v
+ }
+ }
+ return cpu, features
+}
+
+// Function definitions in an LLVM IR dump have an attribute tag (#<num>);
+// we then look for an attribute declaration with the same number later
+// in the dump.
+
+func parseClangOut(r io.Reader) (string, string) {
+
+ // function def:
+ // define dso_local void @Add512(%struct.vstuff*) #0 {
+ fcnr := regexp.MustCompile(`^define.*@Add512\(.*\)\s+#(\d)\s+{\s*$`)
+
+ // function attrs:
+ // attributes #0 = { nounwind uwtable "x"="y" .... }
+ attrr := regexp.MustCompile(`^attributes\s+#(\d+)\s+\=\s+{(.+)\}\s*$`)
+
+ rawattrs := ""
+ attrnum := int64(-1)
+ scanner := bufio.NewScanner(r)
+ for scanner.Scan() {
+ verb(3, "clangline is %s", scanner.Text())
+ sl := fcnr.FindSubmatch(scanner.Bytes())
+ if len(sl) == 2 {
+ at, serr := strconv.ParseInt(string(sl[1]), 10, 64)
+ if serr != nil {
+ fatal("problems matching %s in %s",
+ string(sl[1]), scanner.Text())
+ }
+ attrnum = at
+ verb(3, "=> attrnum is %d", attrnum)
+ }
+ if attrnum != int64(-1) {
+ sl := attrr.FindSubmatch(scanner.Bytes())
+ if len(sl) == 3 {
+ at, serr := strconv.ParseInt(string(sl[1]), 10, 64)
+ if serr != nil {
+ fatal("problems matching %s in %s",
+ string(sl[1]), scanner.Text())
+ }
+ verb(3, "=-= at = %v\n", at)
+ if at == attrnum {
+ rawattrs = string(sl[2])
+ verb(3, "=> found rawattrs %s", rawattrs)
+ break
+ }
+ }
+ }
+ }
+ if scanner.Err() != nil {
+ fatal("error scanning clang output: %v", scanner.Err())
+ }
+
+ if rawattrs == "" {
+ fatal("unable to locate fcn attrs in clang output")
+ }
+ return parseAttrs(rawattrs)
+}
+
+func parseClangOutFile(cloutfile string) (string, string) {
+ infile, err := os.Open(cloutfile)
+ if err != nil {
+ fatal("problems opening clang output file %s: %s", cloutfile, err)
+ }
+ return parseClangOut(infile)
+}
+
+// For debugging (not needed for final output)
+
+func emitClangCmdLine(tdir string, cpu string, clargs []string) {
+ f := filepath.Join(tdir, fmt.Sprintf("%s.clangcmd.txt", cpu))
+ outfile, err := os.OpenFile(f, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666)
+ if err != nil {
+ fatal("unable to open trace output file %s", f)
+ }
+ bw := bufio.NewWriter(outfile)
+ bw.WriteString("clang")
+ for _, arg := range clargs {
+ bw.WriteString(" ")
+ bw.WriteString(arg)
+ }
+ bw.WriteString("\n")
+ if err := bw.Flush(); err != nil {
+ fatal("error writing file %s: %v", f, err)
+ }
+ if err := outfile.Close(); err != nil {
+ fatal("error closing output file %s: %v", f, err)
+ }
+}
+
+func enumerateAttributes(triple string, tdir string, cpus []string, bw *bufio.Writer, tf string, idx int) {
+
+ verb(1, "enumerating attributes for %d cpus", len(cpus))
+
+ // First entry in the list needs to be the default CPU
+ ecpus := append([]string{""}, cpus...)
+
+ // Process the various CPUs in parallel
+ sema := make(chan struct{}, runtime.NumCPU()) // limit concurrency
+ rchan := make(chan result, runtime.NumCPU())
+ for _, cpu := range ecpus {
+ verb(1, "enumerate for cpu %s", cpu)
+
+ go func(cpu string) {
+ sema <- struct{}{}
+ defer func() {
+ <-sema
+ }()
+
+ // Invoke clang with proper arguments
+ lloutfile := filepath.Join(tdir, fmt.Sprintf("%s.ll", cpu))
+ clargs := []string{"-emit-llvm", "-S", "-o", lloutfile,
+ "-O3", "-Xclang", "-disable-llvm-passes", tf}
+ cpuarch := "arch"
+ if triple != "" {
+ clargs = append(clargs, fmt.Sprintf("--target=%s", triple))
+ }
+ if triple != defaultTriple {
+ cpuarch = "cpu"
+ }
+ if cpu != "" {
+ clargs = append(clargs, fmt.Sprintf("-m%s=%s", cpuarch, cpu))
+ }
+ emitClangCmdLine(tdir, cpu, clargs)
+ cmd := exec.Command("clang", clargs...)
+ output, cerr := cmd.CombinedOutput()
+ if cerr != nil {
+ if triple == "" {
+ warn("clang run failed: %s", output)
+ fatal("err = %v", cerr)
+ }
+ // Note the 'supported:false' (indicating that this CPU
+ // value is not viable).
+ rchan <- result{cpu: cpu, attrs: strings.Join(clargs, " "), supported: false, def: false}
+ } else {
+ // Sift through the output for attr set.
+ acpu, attrs := parseClangOutFile(lloutfile)
+ adef := false
+ if cpu == "" {
+ adef = true
+ }
+
+ // Send results on to the consume.
+ rchan <- result{cpu: acpu, attrs: attrs, supported: true, def: adef}
+ }
+ }(cpu)
+ }
+
+ // Read raw results.
+ visited := make(map[string]bool)
+ results := []result{}
+ for range ecpus {
+ r := <-rchan
+ verb(1, "result: %v", r)
+ if !r.supported {
+ continue
+ }
+ if _, ok := visited[r.cpu]; ok {
+ continue
+ }
+ visited[r.cpu] = true
+ results = append(results, r)
+ }
+
+ // Sort, then write to output
+ fmt.Fprintf(bw, "// triple: %s\n", triple)
+ fmt.Fprintf(bw, "static const CpuAttrs attrs%d[] = {\n", idx)
+ bw.WriteString(" // first entry is default cpu\n")
+ sort.Sort(ByCpu(results))
+ for i := 0; i < len(results); i++ {
+ r := results[i]
+ fmt.Fprintf(bw, " { \"%s\", \"%s\" },\n", r.cpu, r.attrs)
+ }
+ bw.WriteString(" { \"\", \"\" } // sentinel\n")
+ bw.WriteString("};\n\n")
+}
+
+// Runs llc to determine default triple value.
+
+func collectDefaultTriple() string {
+ // Run llc to collect default triple
+ llcargs := []string{"--version"}
+ cmd := exec.Command("llc", llcargs...)
+ output, err := cmd.CombinedOutput()
+ verb(3, "llc output is: %s\n", string(output))
+ if err != nil {
+ fatal("llc --version failed")
+ }
+
+ rx := regexp.MustCompile(`^\s*Default target:\s+(\S+)\s*$`)
+ scanner := bufio.NewScanner(strings.NewReader(string(output)))
+ scanner.Split(bufio.ScanLines)
+ for scanner.Scan() {
+ verb(3, "llc line is: %s", scanner.Text())
+ asl := rx.FindSubmatch(scanner.Bytes())
+ if len(asl) == 2 {
+ return string(asl[1])
+ }
+ }
+
+ fatal("parsing of llc --version output failed")
+ return ""
+}
+
+func genCPUs(triple string) []string {
+
+ // If -cpu XXX then just use that.
+ if *cpuflag != "" {
+ return strings.Split(*cpuflag, ",")
+ }
+
+ // Alternatively, look at the output of "llc -mcpu=help" (running
+ // clang with "-march=IllegalBadVal" also has similar effects, but
+ // doesn't allow you to set the triple currently).
+
+ tgtopt := []string{"-mcpu=help", fmt.Sprintf("-mtriple=%s", triple)}
+ cmd := exec.Command("llc", tgtopt...)
+ output, cerr := cmd.CombinedOutput()
+ if cerr != nil {
+ warn("llc run failed: %s", output)
+ fatal("err = %v", cerr)
+ }
+ verb(3, "llc output is: %s\n", string(output))
+
+ // Parse the output
+ resultcpus := []string{""}
+ rw := regexp.MustCompile(`^\s*$`)
+ r1 := regexp.MustCompile(`^Available (\S+) for this target:\s*$`)
+ r2 := regexp.MustCompile(`^\s*(\S+)\s+\-\s\S.*$`)
+ rtx := regexp.MustCompile(`^Use \+feature.*$`)
+ rty := regexp.MustCompile(`^For example.*$`)
+ scanner := bufio.NewScanner(strings.NewReader(string(output)))
+ scanner.Split(bufio.ScanLines)
+ which := ""
+ for scanner.Scan() {
+ verb(3, "llc line is: %s", scanner.Text())
+ lineb := scanner.Bytes()
+ if rw.Find(lineb) != nil || rtx.Find(lineb) != nil ||
+ rty.Find(lineb) != nil {
+ continue
+ }
+ asl := r1.FindSubmatch(lineb)
+ if len(asl) == 2 {
+ which = string(asl[1])
+ continue
+ }
+ bsl := r2.FindSubmatch(lineb)
+ if len(bsl) == 2 {
+ if which == "CPUs" {
+ cpu := string(bsl[1])
+ resultcpus = append(resultcpus, cpu)
+ }
+ continue
+ }
+ warn("unmatched lined in llc output: %s", string(lineb))
+ }
+ if scanner.Err() != nil {
+ fatal("error scanning llc output: %v", scanner.Err())
+ }
+
+ return resultcpus
+}
+
+const pream1 = `// DO NOT EDIT: this file auto-generated by the following command:
+//
+`
+
+const pream2 = `
+typedef struct {
+ const char *cpu;
+ const char *attrs;
+} CpuAttrs;
+
+typedef struct {
+ const char *triple;
+ const CpuAttrs *cpuattrs;
+} TripleCpus;
+
+`
+
+func prolog(bw *bufio.Writer) {
+ bw.WriteString(pream1)
+ bw.WriteString("// ")
+ for _, arg := range os.Args {
+ bw.WriteString(" ")
+ bw.WriteString(arg)
+ }
+ bw.WriteString("\n//\n")
+ bw.WriteString("// in combination with clang:\n//\n")
+ cmd := exec.Command("clang", "--version")
+ output, cerr := cmd.CombinedOutput()
+ if cerr != nil {
+ warn("clang run failed: %s", output)
+ fatal("err = %v", cerr)
+ }
+ sl := strings.Split(string(output), "\n")
+ bw.WriteString("// ")
+ bw.WriteString(sl[0])
+ bw.WriteString("\n//\n")
+ bw.WriteString(pream2)
+}
+
+func epilog(bw *bufio.Writer, triples []string) {
+ bw.WriteString("const TripleCpus triples[] = {\n")
+ for k, t := range triples {
+ bw.WriteString(fmt.Sprintf(" { \"%s\", &attrs%d[0] },\n", t, k))
+ }
+ bw.WriteString(" { \"\", nullptr } // sentinel\n")
+ bw.WriteString("};\n")
+}
+
+func perform() {
+
+ // Create tempdir
+ dir, err := ioutil.TempDir("", "CaptureFcnAttrsTempDir")
+ if err != nil {
+ fatal("ioutil.TempDir failed, err=%v", err)
+ }
+ if *noclflag {
+ defer func() { fmt.Printf("preserving temp dir %s\n", dir) }()
+ } else {
+ defer os.RemoveAll(dir)
+ }
+
+ // Emit tempfile
+ tf := filepath.Join(dir, "file.c")
+ verb(1, "temp file is %s", tf)
+ if err := ioutil.WriteFile(tf, []byte(prog), 0666); err != nil {
+ fatal("ioutil.WriteFile failed, err=%v", err)
+ }
+
+ // Open output file
+ var outfile = os.Stdout
+ if len(*outfileflag) > 0 {
+ verb(1, "opening %s", *outfileflag)
+ outfile, err = os.OpenFile(*outfileflag,
+ os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666)
+ if err != nil {
+ log.Fatal(err)
+ }
+ }
+
+ defaultTriple = collectDefaultTriple()
+ triples := strings.Split(*triplesflag, ",")
+ if len(triples) == 0 {
+ triples = append(triples, defaultTriple)
+ }
+ bw := bufio.NewWriter(outfile)
+ prolog(bw)
+ for k, trip := range triples {
+
+ // CPU selection (either from option or via clang/llc)
+ cpus := genCPUs(trip)
+
+ // Enumerate attributes for the specified CPUs
+ enumerateAttributes(trip, dir, cpus, bw, tf, k)
+ }
+ epilog(bw, triples)
+ if err := bw.Flush(); err != nil {
+ fatal("error writing output: %v", err)
+ }
+ if len(*outfileflag) > 0 {
+ if err := outfile.Close(); err != nil {
+ fatal("error closing output file %s: %v", *outfileflag, err)
+ }
+ }
+}
+
+func main() {
+ log.SetFlags(0)
+ log.SetPrefix("capture-fcn-attributes: ")
+ flag.Parse()
+ verb(1, "in main")
+ if flag.NArg() != 0 {
+ usage("please run without arguments")
+ }
+ perform()
+ verb(1, "leaving main")
+ os.Exit(exitst)
+}