gollvm: add a Go specific "sret" attribute

With -O3, the argument promotion pass can optimize a StructRet
argument to nonnull noalias. The StructRet attribute is lost.
However, the statepoint pass uses this attribute to detect if a
stack slot is initialized at a call.

Introduce a Go specific "sret" attribute and let the statepoint
pass check for it. This attribute is preserved during the
optimization.

Now precise stack scan works with -O3.

Change-Id: I3cbd8bd8ca7421fd2d4900e73d50edd7c963d56c
Reviewed-on: https://go-review.googlesource.com/c/gollvm/+/165239
Reviewed-by: Than McIntosh <thanm@google.com>
diff --git a/bridge/go-llvm-materialize.cpp b/bridge/go-llvm-materialize.cpp
index dce76d1..e8814c2 100644
--- a/bridge/go-llvm-materialize.cpp
+++ b/bridge/go-llvm-materialize.cpp
@@ -1293,8 +1293,10 @@
 {
   // Sret attribute if needed
   const CABIParamInfo &returnInfo = state.oracle.returnInfo();
-  if (returnInfo.disp() == ParmIndirect)
+  if (returnInfo.disp() == ParmIndirect) {
     call->addAttribute(1, llvm::Attribute::StructRet);
+    call->addAttribute(1, llvm::Attribute::get(call->getContext(), "go_sret"));
+  }
 
   // Nest attribute if needed
   const CABIParamInfo &chainInfo = state.oracle.chainInfo();
diff --git a/passes/GoStatepoints.cpp b/passes/GoStatepoints.cpp
index c9e0ab5..1a3a0e4 100644
--- a/passes/GoStatepoints.cpp
+++ b/passes/GoStatepoints.cpp
@@ -2252,6 +2252,13 @@
   return nullptr;
 }
 
+static bool
+hasStructRetAttr(CallBase *Call) {
+  return Call->hasStructRetAttr() ||
+         (Call->getNumArgOperands() > 0 &&
+          Call->getParamAttr(0, "go_sret") != Attribute());
+}
+
 /// Compute the live-in set for the location rbegin starting from
 /// the live-out set of the basic block
 static void computeLiveInValues(BasicBlock::reverse_iterator Begin,
@@ -2360,7 +2367,7 @@
     }
 
     if (CallInst *CI = dyn_cast<CallInst>(&I)){
-      if (CI->hasStructRetAttr()) {
+      if (hasStructRetAttr(CI)) {
         Value *V = CI->getOperand(0);
         if (Value *Base = isTrackedAlloca(V, DVCache)) {
           AllocaDefs.insert(Base);
@@ -2394,7 +2401,7 @@
     }
 
     if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
-      if (II->hasStructRetAttr()) {
+      if (hasStructRetAttr(II)) {
         Value *V = II->getOperand(0);
         if (Value *Base = isTrackedAlloca(V, DVCache)) {
           AllocaDefs.insert(Base);
@@ -2563,7 +2570,7 @@
       if (isTrackedAlloca(Ptr, DVCache) == V)
         StoreSize += DL.getTypeStoreSize(SI->getValueOperand()->getType());
     } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
-      if (CI->hasStructRetAttr()) {
+      if (hasStructRetAttr(CI)) {
         Value *Ptr = CI->getOperand(0);
         if (isTrackedAlloca(Ptr, DVCache) == V)
           StoreSize += DL.getTypeStoreSize(Ptr->getType()->getPointerElementType());
@@ -2587,7 +2594,7 @@
           break;
         }
     } else if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
-      if (II->hasStructRetAttr()) {
+      if (hasStructRetAttr(II)) {
         Value *Ptr = II->getOperand(0);
         if (isTrackedAlloca(Ptr, DVCache) == V)
           if (DL.getTypeStoreSize(Ptr->getType()->getPointerElementType()) + PtrSize - 1 >= Size)
@@ -2883,7 +2890,7 @@
   // result (passed directly, or indirectly as outgoing arg).
   LiveOut.remove(Inst);
   if (InvokeInst *II = dyn_cast<InvokeInst>(Inst))
-    if (II->hasStructRetAttr()) {
+    if (hasStructRetAttr(II)) {
       Value *Ptr = II->getOperand(0);
       Value *V = Ptr->stripPointerCasts();
       const DataLayout &DL = Inst->getModule()->getDataLayout();
diff --git a/unittests/BackendCore/BackendCABIOracleTests.cpp b/unittests/BackendCore/BackendCABIOracleTests.cpp
index 99f8108..5d7e1b6 100644
--- a/unittests/BackendCore/BackendCABIOracleTests.cpp
+++ b/unittests/BackendCore/BackendCABIOracleTests.cpp
@@ -395,7 +395,7 @@
   const char *exp = R"RAW_RESULT(
     %cast.0 = bitcast [2 x float]* %p0.addr to <2 x float>*
     %ld.0 = load <2 x float>, <2 x float>* %cast.0
-    call addrspace(0) void @foo([3 x double]* sret %sret.actual.0, i8* nest undef, <2 x float> %ld.0)
+    call addrspace(0) void @foo([3 x double]* sret "go_sret" %sret.actual.0, i8* nest undef, <2 x float> %ld.0)
     %cast.1 = bitcast [3 x double]* %sret.formal.0 to i8*
     %cast.2 = bitcast [3 x double]* %sret.actual.0 to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 %cast.2, i64 24, i1 false)