gollvm: sync with LLVM trunk

This patch brings the gollvm source into sync with LLVM trunk
circa revision af57b139a0808be41383e8b3838bb8277423c2ab.

Fixes golang/go#37509

Change-Id: Ifdca6a2feba0ef0066fa826e9b14be7fbed47d99
Reviewed-on: https://go-review.googlesource.com/c/gollvm/+/221997
Reviewed-by: Than McIntosh <thanm@google.com>
diff --git a/bridge/go-llvm-irbuilders.h b/bridge/go-llvm-irbuilders.h
index 39363ab..c855d97 100644
--- a/bridge/go-llvm-irbuilders.h
+++ b/bridge/go-llvm-irbuilders.h
@@ -49,7 +49,7 @@
   typedef llvm::IRBuilder<llvm::ConstantFolder, BexprInserter> IRBuilderB;
  public:
   BexprLIRBuilder(llvm::LLVMContext &context, Bexpression *expr) :
-      IRBuilderB(context, llvm::ConstantFolder(), getInserter(), nullptr, llvm::None) {
+      IRBuilderB(context, llvm::ConstantFolder()) {
     getInserter().setDest(expr);
   }
 };
@@ -81,7 +81,7 @@
                           BinstructionsInserter> IRBuilderB;
  public:
   BinstructionsLIRBuilder(llvm::LLVMContext &context, Binstructions *insns) :
-      IRBuilderB(context, llvm::ConstantFolder(), getInserter(), nullptr, llvm::None) {
+      IRBuilderB(context, llvm::ConstantFolder()) {
     getInserter().setDest(insns);
   }
 };
diff --git a/driver/CompileGo.cpp b/driver/CompileGo.cpp
index 32cf7dc..0393812 100644
--- a/driver/CompileGo.cpp
+++ b/driver/CompileGo.cpp
@@ -439,7 +439,7 @@
       StringRef fname = fnamearg->getValue();
       std::error_code EC;
       optRecordFile_ = std::make_unique<llvm::ToolOutputFile>(
-          fname, EC, llvm::sys::fs::F_None);
+          fname, EC, llvm::sys::fs::OF_None);
       if (EC) {
         errs() << "error: unable to open file '"
                << fname << "' to emit optimization remarks\n";
diff --git a/driver/ReadStdin.cpp b/driver/ReadStdin.cpp
index 399bb4d..a3f47fe 100644
--- a/driver/ReadStdin.cpp
+++ b/driver/ReadStdin.cpp
@@ -17,6 +17,7 @@
 
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/FileSystem.h"
 
 namespace gollvm {
 namespace driver {
@@ -61,7 +62,7 @@
   // Emit to the output artifact.
   std::error_code errc;
   llvm::raw_fd_ostream ostr(output.file(), errc,
-                            llvm::sys::fs::OpenFlags::F_None);
+                            llvm::sys::fs::OpenFlags::OF_None);
   if (errc) {
     llvm::errs() << compilation.driver().progname()
                  << ": cannot open " << output.file() << " for writing: "
diff --git a/libgo/godumpspec/godumpspec.cpp b/libgo/godumpspec/godumpspec.cpp
index edf7705..8a88fba 100644
--- a/libgo/godumpspec/godumpspec.cpp
+++ b/libgo/godumpspec/godumpspec.cpp
@@ -1266,7 +1266,7 @@
   if (!OutputFilename.empty()) {
     std::error_code EC;
     OutputFile = std::make_unique<ToolOutputFile>(OutputFilename, EC,
-                                                  sys::fs::F_None);
+                                                  sys::fs::OF_None);
     // Don't remove output file if we exit with an error.
     OutputFile->keep();
     error("Unable to open output file" + OutputFilename, EC);
diff --git a/unittests/BackendCore/BackendArrayStruct.cpp b/unittests/BackendCore/BackendArrayStruct.cpp
index f38882b..249e8cb 100644
--- a/unittests/BackendCore/BackendArrayStruct.cpp
+++ b/unittests/BackendCore/BackendArrayStruct.cpp
@@ -80,8 +80,7 @@
 
   const char *exp = R"RAW_RESULT(
     %cast.0 = bitcast { i8*, i32 }* %loc1 to i8*
-    %cast.1 = bitcast { i8*, i32 }* @const.0 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 %cast.1, i64 16, i1 false)
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i8*, i32 }* @const.0 to i8*), i64 16, i1 false)
     store { i8*, i32 }* %loc1, { i8*, i32 }** %loc2
     store i32 0, i32* %x
     %field.0 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %loc1, i32 0, i32 1
@@ -308,11 +307,9 @@
 
   const char *exp = R"RAW_RESULT(
     %cast.0 = bitcast [4 x i64]* %aa to i8*
-    %cast.1 = bitcast [4 x i64]* @const.0 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 %cast.1, i64 32, i1 false)
-    %cast.2 = bitcast [4 x i64]* %ab to i8*
-    %cast.3 = bitcast [4 x i64]* @const.1 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 %cast.3, i64 32, i1 false)
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ([4 x i64]* @const.0 to i8*), i64 32, i1 false)
+    %cast.1 = bitcast [4 x i64]* %ab to i8*
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 bitcast ([4 x i64]* @const.1 to i8*), i64 32, i1 false)
     store i64 0, i64* %z
     %z.ld.0 = load i64, i64* %z
     %index.0 = getelementptr [4 x i64], [4 x i64]* %ac, i32 0, i32 0
@@ -371,8 +368,7 @@
 
   const char *exp = R"RAW_RESULT(
     %cast.0 = bitcast { i32*, i32 }* %loc1 to i8*
-    %cast.1 = bitcast { i32*, i32 }* @const.0 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 %cast.1, i64 16, i1 false)
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i32*, i32 }* @const.0 to i8*), i64 16, i1 false)
     %field.0 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %loc1, i32 0, i32 1
     %loc1.field.ld.0 = load i32, i32* %field.0
     %field.1 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %loc2, i32 0, i32 0
@@ -428,16 +424,15 @@
 
   const char *exp = R"RAW_RESULT(
     %cast.0 = bitcast { { i32*, i32 }, float }* %loc1 to i8*
-    %cast.1 = bitcast { { i32*, i32 }, float }* @const.0 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 %cast.1, i64 24, i1 false)
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ { i32*, i32 }, float }* @const.0 to i8*), i64 24, i1 false)
     %field.0 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %tmp.0, i32 0, i32 0
     store i32* %param1.addr, i32** %field.0
     %field.1 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %tmp.0, i32 0, i32 1
     store i32 3, i32* %field.1
     %field.2 = getelementptr inbounds { { i32*, i32 }, float }, { { i32*, i32 }, float }* %loc1, i32 0, i32 0
-    %cast.2 = bitcast { i32*, i32 }* %field.2 to i8*
-    %cast.3 = bitcast { i32*, i32 }* %tmp.0 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 %cast.3, i64 16, i1 false)
+    %cast.1 = bitcast { i32*, i32 }* %field.2 to i8*
+    %cast.2 = bitcast { i32*, i32 }* %tmp.0 to i8*
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 %cast.2, i64 16, i1 false)
     %field.3 = getelementptr inbounds { { i32*, i32 }, float }, { { i32*, i32 }, float }* %loc1, i32 0, i32 1
     store float 3.000000e+00, float* %field.3
   )RAW_RESULT";
@@ -586,8 +581,7 @@
 
   const char *exp = R"RAW_RESULT(
     %cast.0 = bitcast [4 x i64]* %aa to i8*
-    %cast.1 = bitcast [4 x i64]* @const.0 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 %cast.1, i64 32, i1 false)
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ([4 x i64]* @const.0 to i8*), i64 32, i1 false)
     %index.0 = getelementptr [4 x i64], [4 x i64]* %aa, i32 0, i32 1
     %aa.index.ld.0 = load i64, i64* %index.0
     %index.1 = getelementptr [4 x i64], [4 x i64]* %aa, i32 0, i64 %aa.index.ld.0
@@ -656,8 +650,7 @@
 
     const char *exp = R"RAW_RESULT(
       %cast.0 = bitcast [10 x { i8, [4 x { i64, i64 }*], i8 }*]* %t1 to i8*
-      %cast.1 = bitcast [10 x { i8, [4 x { i64, i64 }*], i8 }*]* @const.0 to i8*
-      call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 %cast.1, i64 80, i1 false)
+      call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ([10 x { i8, [4 x { i64, i64 }*], i8 }*]* @const.0 to i8*), i64 80, i1 false)
       %index.0 = getelementptr [10 x { i8, [4 x { i64, i64 }*], i8 }*], [10 x { i8, [4 x { i64, i64 }*], i8 }*]* %t1, i32 0, i32 7
       %t1.index.ld.0 = load { i8, [4 x { i64, i64 }*], i8 }*, { i8, [4 x { i64, i64 }*], i8 }** %index.0
       %field.0 = getelementptr inbounds { i8, [4 x { i64, i64 }*], i8 }, { i8, [4 x { i64, i64 }*], i8 }* %t1.index.ld.0, i32 0, i32 1
@@ -738,23 +731,19 @@
 
   const char *exp = R"RAW_RESULT(
     %cast.0 = bitcast { i8* }* %x1 to i8*
-    %cast.1 = bitcast { i8* }* @const.0 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 %cast.1, i64 8, i1 false)
-    %cast.2 = bitcast { i8* }* %y1 to i8*
-    %cast.3 = bitcast { i8* }* @const.0 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 %cast.3, i64 8, i1 false)
-    %cast.4 = bitcast { i64, i64, i64, i64, i64, i64 }* %x2 to i8*
-    %cast.5 = bitcast { i64, i64, i64, i64, i64, i64 }* @const.1 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.4, i8* align 8 %cast.5, i64 48, i1 false)
-    %cast.6 = bitcast { i64, i64, i64, i64, i64, i64 }* %y2 to i8*
-    %cast.7 = bitcast { i64, i64, i64, i64, i64, i64 }* @const.1 to i8*
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i8* }* @const.0 to i8*), i64 8, i1 false)
+    %cast.1 = bitcast { i8* }* %y1 to i8*
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 bitcast ({ i8* }* @const.0 to i8*), i64 8, i1 false)
+    %cast.2 = bitcast { i64, i64, i64, i64, i64, i64 }* %x2 to i8*
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 bitcast ({ i64, i64, i64, i64, i64, i64 }* @const.1 to i8*), i64 48, i1 false)
+    %cast.3 = bitcast { i64, i64, i64, i64, i64, i64 }* %y2 to i8*
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.3, i8* align 8 bitcast ({ i64, i64, i64, i64, i64, i64 }* @const.1 to i8*), i64 48, i1 false)
+    %cast.4 = bitcast { i8* }* %x1 to i8*
+    %cast.5 = bitcast { i8* }* %y1 to i8*
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.4, i8* align 8 %cast.5, i64 8, i1 false)
+    %cast.6 = bitcast { i64, i64, i64, i64, i64, i64 }* %x2 to i8*
+    %cast.7 = bitcast { i64, i64, i64, i64, i64, i64 }* %y2 to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.6, i8* align 8 %cast.7, i64 48, i1 false)
-    %cast.8 = bitcast { i8* }* %x1 to i8*
-    %cast.9 = bitcast { i8* }* %y1 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.8, i8* align 8 %cast.9, i64 8, i1 false)
-    %cast.10 = bitcast { i64, i64, i64, i64, i64, i64 }* %x2 to i8*
-    %cast.11 = bitcast { i64, i64, i64, i64, i64, i64 }* %y2 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.10, i8* align 8 %cast.11, i64 48, i1 false)
   )RAW_RESULT";
 
   bool isOK = h.expectBlock(exp);
@@ -799,8 +788,7 @@
 
   const char *exp = R"RAW_RESULT(
     %cast.0 = bitcast { i32 }* %t1 to i8*
-    %cast.1 = bitcast { i32 }* @const.0 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 %cast.1, i64 4, i1 false)
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 bitcast ({ i32 }* @const.0 to i8*), i64 4, i1 false)
     %field.0 = getelementptr inbounds { i32 }, { i32 }* %t1, i32 0, i32 0
     store i32* %field.0, i32** %a1
     store i32* getelementptr inbounds ({ i32 }, { i32 }* @t2, i32 0, i32 0), i32** %a2
diff --git a/unittests/BackendCore/BackendCallTests.cpp b/unittests/BackendCore/BackendCallTests.cpp
index 61fdb73..5e172dc 100644
--- a/unittests/BackendCore/BackendCallTests.cpp
+++ b/unittests/BackendCore/BackendCallTests.cpp
@@ -120,8 +120,7 @@
   {
     const char *exp = R"RAW_RESULT(
       %cast.0 = bitcast { i8*, i32*, i64*, i64 }* %sret.formal.0 to i8*
-      %cast.1 = bitcast { i8*, i32*, i64*, i64 }* @const.0 to i8*
-      call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 %cast.1, i64 32, i1 false)
+      call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i8*, i32*, i64*, i64 }* @const.0 to i8*), i64 32, i1 false)
       ret void
     )RAW_RESULT";
 
@@ -153,9 +152,9 @@
       store i64* null, i64** %field.2
       %field.3 = getelementptr inbounds { i8*, i32*, i64*, i64 }, { i8*, i32*, i64*, i64 }* %tmp.0, i32 0, i32 3
       store i64 101, i64* %field.3
-      %cast.3 = bitcast { i8*, i32*, i64*, i64 }* %sret.formal.0 to i8*
-      %cast.4 = bitcast { i8*, i32*, i64*, i64 }* %tmp.0 to i8*
-      call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.3, i8* align 8 %cast.4, i64 32, i1 false)
+      %cast.2 = bitcast { i8*, i32*, i64*, i64 }* %sret.formal.0 to i8*
+      %cast.3 = bitcast { i8*, i32*, i64*, i64 }* %tmp.0 to i8*
+      call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 %cast.3, i64 32, i1 false)
       ret void
     )RAW_RESULT";
 
diff --git a/unittests/BackendCore/BackendExprTests.cpp b/unittests/BackendCore/BackendExprTests.cpp
index 3343734..d3ffd3c 100644
--- a/unittests/BackendCore/BackendExprTests.cpp
+++ b/unittests/BackendCore/BackendExprTests.cpp
@@ -505,20 +505,16 @@
       %x = alloca { double, double }
       %y = alloca { double, double }
       %cast.0 = bitcast { float, float }* %a to i8*
-      %cast.1 = bitcast { float, float }* @const.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 %cast.1, i64 8, i1 false)
-      %cast.2 = bitcast { float, float }* %b to i8*
-      %cast.3 = bitcast { float, float }* @const.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 %cast.3, i64 8, i1 false)
-      %cast.4 = bitcast { double, double }* %x to i8*
-      %cast.5 = bitcast { double, double }* @const.1 to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 bitcast ({ float, float }* @const.0 to i8*), i64 8, i1 false)
+      %cast.1 = bitcast { float, float }* %b to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.1, i8* align 4 bitcast ({ float, float }* @const.0 to i8*), i64 8, i1 false)
+      %cast.2 = bitcast { double, double }* %x to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 bitcast ({ double, double }* @const.1 to i8*), i64 16, i1 false)
+      %cast.3 = bitcast { double, double }* %y to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.3, i8* align 8 bitcast ({ double, double }* @const.1 to i8*), i64 16, i1 false)
+      %cast.4 = bitcast { double, double }* %tmp.0 to i8*
+      %cast.5 = bitcast { double, double }* %x to i8*
       call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.4, i8* align 8 %cast.5, i64 16, i1 false)
-      %cast.6 = bitcast { double, double }* %y to i8*
-      %cast.7 = bitcast { double, double }* @const.1 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.6, i8* align 8 %cast.7, i64 16, i1 false)
-      %cast.8 = bitcast { double, double }* %tmp.0 to i8*
-      %cast.9 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.8, i8* align 8 %cast.9, i64 16, i1 false)
       %field.0 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 0
       %.real.ld.0 = load double, double* %field.0
       %fptrunc.0 = fptrunc double %.real.ld.0 to float
@@ -529,12 +525,12 @@
       store float %fptrunc.0, float* %field.2
       %field.3 = getelementptr inbounds { float, float }, { float, float }* %tmp.1, i32 0, i32 1
       store float %fptrunc.1, float* %field.3
-      %cast.10 = bitcast { float, float }* %a to i8*
-      %cast.11 = bitcast { float, float }* %tmp.1 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.10, i8* align 4 %cast.11, i64 8, i1 false)
-      %cast.12 = bitcast { float, float }* %tmp.2 to i8*
-      %cast.13 = bitcast { float, float }* %b to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.12, i8* align 4 %cast.13, i64 8, i1 false)
+      %cast.6 = bitcast { float, float }* %a to i8*
+      %cast.7 = bitcast { float, float }* %tmp.1 to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.6, i8* align 4 %cast.7, i64 8, i1 false)
+      %cast.8 = bitcast { float, float }* %tmp.2 to i8*
+      %cast.9 = bitcast { float, float }* %b to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.8, i8* align 4 %cast.9, i64 8, i1 false)
       %field.4 = getelementptr inbounds { float, float }, { float, float }* %tmp.2, i32 0, i32 0
       %.real.ld.1 = load float, float* %field.4
       %fpext.0 = fpext float %.real.ld.1 to double
@@ -545,15 +541,15 @@
       store double %fpext.0, double* %field.6
       %field.7 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 1
       store double %fpext.1, double* %field.7
-      %cast.14 = bitcast { double, double }* %y to i8*
-      %cast.15 = bitcast { double, double }* %tmp.3 to i8*
+      %cast.10 = bitcast { double, double }* %y to i8*
+      %cast.11 = bitcast { double, double }* %tmp.3 to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.10, i8* align 8 %cast.11, i64 16, i1 false)
+      %cast.12 = bitcast { float, float }* %a to i8*
+      %cast.13 = bitcast { float, float }* %b to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.12, i8* align 4 %cast.13, i64 8, i1 false)
+      %cast.14 = bitcast { double, double }* %x to i8*
+      %cast.15 = bitcast { double, double }* %y to i8*
       call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.14, i8* align 8 %cast.15, i64 16, i1 false)
-      %cast.16 = bitcast { float, float }* %a to i8*
-      %cast.17 = bitcast { float, float }* %b to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.16, i8* align 4 %cast.17, i64 8, i1 false)
-      %cast.18 = bitcast { double, double }* %x to i8*
-      %cast.19 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.18, i8* align 8 %cast.19, i64 16, i1 false)
       ret void
     }
   )RAW_RESULT";
@@ -1099,21 +1095,18 @@
       %z = alloca { double, double }
       %b = alloca i8
       %cast.0 = bitcast { double, double }* %x to i8*
-      %cast.1 = bitcast { double, double }* @const.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 %cast.1, i64 16, i1 false)
-      %cast.2 = bitcast { double, double }* %y to i8*
-      %cast.3 = bitcast { double, double }* @const.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 %cast.3, i64 16, i1 false)
-      %cast.4 = bitcast { double, double }* %z to i8*
-      %cast.5 = bitcast { double, double }* @const.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.4, i8* align 8 %cast.5, i64 16, i1 false)
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
+      %cast.1 = bitcast { double, double }* %y to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
+      %cast.2 = bitcast { double, double }* %z to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
       store i8 0, i8* %b
-      %cast.6 = bitcast { double, double }* %tmp.0 to i8*
-      %cast.7 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.6, i8* align 8 %cast.7, i64 16, i1 false)
-      %cast.8 = bitcast { double, double }* %tmp.1 to i8*
-      %cast.9 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.8, i8* align 8 %cast.9, i64 16, i1 false)
+      %cast.3 = bitcast { double, double }* %tmp.0 to i8*
+      %cast.4 = bitcast { double, double }* %x to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.3, i8* align 8 %cast.4, i64 16, i1 false)
+      %cast.5 = bitcast { double, double }* %tmp.1 to i8*
+      %cast.6 = bitcast { double, double }* %y to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.5, i8* align 8 %cast.6, i64 16, i1 false)
       %field.0 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 0
       %.real.ld.0 = load double, double* %field.0
       %field.1 = getelementptr inbounds { double, double }, { double, double }* %tmp.1, i32 0, i32 0
@@ -1128,15 +1121,15 @@
       store double %fadd.0, double* %field.4
       %field.5 = getelementptr inbounds { double, double }, { double, double }* %tmp.2, i32 0, i32 1
       store double %fadd.1, double* %field.5
-      %cast.10 = bitcast { double, double }* %z to i8*
-      %cast.11 = bitcast { double, double }* %tmp.2 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.10, i8* align 8 %cast.11, i64 16, i1 false)
-      %cast.12 = bitcast { double, double }* %tmp.3 to i8*
-      %cast.13 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.12, i8* align 8 %cast.13, i64 16, i1 false)
-      %cast.14 = bitcast { double, double }* %tmp.4 to i8*
-      %cast.15 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.14, i8* align 8 %cast.15, i64 16, i1 false)
+      %cast.7 = bitcast { double, double }* %z to i8*
+      %cast.8 = bitcast { double, double }* %tmp.2 to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.7, i8* align 8 %cast.8, i64 16, i1 false)
+      %cast.9 = bitcast { double, double }* %tmp.3 to i8*
+      %cast.10 = bitcast { double, double }* %x to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.9, i8* align 8 %cast.10, i64 16, i1 false)
+      %cast.11 = bitcast { double, double }* %tmp.4 to i8*
+      %cast.12 = bitcast { double, double }* %y to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.11, i8* align 8 %cast.12, i64 16, i1 false)
       %field.6 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 0
       %.real.ld.2 = load double, double* %field.6
       %field.7 = getelementptr inbounds { double, double }, { double, double }* %tmp.4, i32 0, i32 0
@@ -1151,15 +1144,15 @@
       store double %fsub.0, double* %field.10
       %field.11 = getelementptr inbounds { double, double }, { double, double }* %tmp.5, i32 0, i32 1
       store double %fsub.1, double* %field.11
-      %cast.16 = bitcast { double, double }* %z to i8*
-      %cast.17 = bitcast { double, double }* %tmp.5 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.16, i8* align 8 %cast.17, i64 16, i1 false)
-      %cast.18 = bitcast { double, double }* %tmp.6 to i8*
-      %cast.19 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.18, i8* align 8 %cast.19, i64 16, i1 false)
-      %cast.20 = bitcast { double, double }* %tmp.7 to i8*
-      %cast.21 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.20, i8* align 8 %cast.21, i64 16, i1 false)
+      %cast.13 = bitcast { double, double }* %z to i8*
+      %cast.14 = bitcast { double, double }* %tmp.5 to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.13, i8* align 8 %cast.14, i64 16, i1 false)
+      %cast.15 = bitcast { double, double }* %tmp.6 to i8*
+      %cast.16 = bitcast { double, double }* %x to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.15, i8* align 8 %cast.16, i64 16, i1 false)
+      %cast.17 = bitcast { double, double }* %tmp.7 to i8*
+      %cast.18 = bitcast { double, double }* %y to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.17, i8* align 8 %cast.18, i64 16, i1 false)
       %field.12 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 0
       %.real.ld.4 = load double, double* %field.12
       %field.13 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 0
@@ -1186,15 +1179,15 @@
       store double %fsub.2, double* %field.20
       %field.21 = getelementptr inbounds { double, double }, { double, double }* %tmp.8, i32 0, i32 1
       store double %fadd.2, double* %field.21
-      %cast.22 = bitcast { double, double }* %z to i8*
-      %cast.23 = bitcast { double, double }* %tmp.8 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.22, i8* align 8 %cast.23, i64 16, i1 false)
-      %cast.24 = bitcast { double, double }* %tmp.9 to i8*
-      %cast.25 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.24, i8* align 8 %cast.25, i64 16, i1 false)
-      %cast.26 = bitcast { double, double }* %tmp.10 to i8*
-      %cast.27 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.26, i8* align 8 %cast.27, i64 16, i1 false)
+      %cast.19 = bitcast { double, double }* %z to i8*
+      %cast.20 = bitcast { double, double }* %tmp.8 to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.19, i8* align 8 %cast.20, i64 16, i1 false)
+      %cast.21 = bitcast { double, double }* %tmp.9 to i8*
+      %cast.22 = bitcast { double, double }* %x to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.21, i8* align 8 %cast.22, i64 16, i1 false)
+      %cast.23 = bitcast { double, double }* %tmp.10 to i8*
+      %cast.24 = bitcast { double, double }* %y to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.23, i8* align 8 %cast.24, i64 16, i1 false)
       %field.22 = getelementptr inbounds { double, double }, { double, double }* %tmp.9, i32 0, i32 0
       %.real.ld.6 = load double, double* %field.22
       %field.23 = getelementptr inbounds { double, double }, { double, double }* %tmp.10, i32 0, i32 0
@@ -1209,12 +1202,12 @@
       %zext.1 = zext i1 %fcmp.1 to i8
       %iand.0 = and i8 %zext.0, %zext.1
       store i8 %iand.0, i8* %b
-      %cast.28 = bitcast { double, double }* %tmp.11 to i8*
-      %cast.29 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.28, i8* align 8 %cast.29, i64 16, i1 false)
-      %cast.30 = bitcast { double, double }* %tmp.12 to i8*
-      %cast.31 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.30, i8* align 8 %cast.31, i64 16, i1 false)
+      %cast.25 = bitcast { double, double }* %tmp.11 to i8*
+      %cast.26 = bitcast { double, double }* %x to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.25, i8* align 8 %cast.26, i64 16, i1 false)
+      %cast.27 = bitcast { double, double }* %tmp.12 to i8*
+      %cast.28 = bitcast { double, double }* %y to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.27, i8* align 8 %cast.28, i64 16, i1 false)
       %field.26 = getelementptr inbounds { double, double }, { double, double }* %tmp.11, i32 0, i32 0
       %.real.ld.8 = load double, double* %field.26
       %field.27 = getelementptr inbounds { double, double }, { double, double }* %tmp.12, i32 0, i32 0
@@ -1277,8 +1270,7 @@
     store double 0.000000e+00, double* %a
     store double 0.000000e+00, double* %b
     %cast.0 = bitcast { double, double }* %x to i8*
-    %cast.1 = bitcast { double, double }* @const.0 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 %cast.1, i64 16, i1 false)
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
     %field.0 = getelementptr inbounds { double, double }, { double, double }* %x, i32 0, i32 0
     %x.real.ld.0 = load double, double* %field.0
     store double %x.real.ld.0, double* %a
@@ -1480,15 +1472,14 @@
       br label %fallthrough.0
 
     fallthrough.0:                                    ; preds = %else.0, %then.0
-      %cast.4 = bitcast { [16 x i32], i32 }* %a to i8*
-      %cast.5 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.4, i8* align 4 %cast.5, i64 68, i1 false)
+      %cast.3 = bitcast { [16 x i32], i32 }* %a to i8*
+      %cast.4 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.3, i8* align 4 %cast.4, i64 68, i1 false)
       ret void
 
     else.0:                                           ; preds = %entry
       %cast.2 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
-      %cast.3 = bitcast { [16 x i32], i32 }* @const.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 %cast.3, i64 68, i1 false)
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 bitcast ({ [16 x i32], i32 }* @const.0 to i8*), i64 68, i1 false)
       br label %fallthrough.0
     }
   )RAW_RESULT";
@@ -1556,15 +1547,14 @@
       br label %fallthrough.0
 
     fallthrough.0:                                    ; preds = %else.0, %then.0
-      %cast.4 = bitcast { [16 x i32], i32 }* %a to i8*
-      %cast.5 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.4, i8* align 4 %cast.5, i64 68, i1 false)
+      %cast.3 = bitcast { [16 x i32], i32 }* %a to i8*
+      %cast.4 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.3, i8* align 4 %cast.4, i64 68, i1 false)
       ret void
 
     else.0:                                           ; preds = %entry
       %cast.2 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
-      %cast.3 = bitcast { [16 x i32], i32 }* @const.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 %cast.3, i64 68, i1 false)
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 bitcast ({ [16 x i32], i32 }* @const.0 to i8*), i64 68, i1 false)
       br label %fallthrough.0
     }
   )RAW_RESULT";
@@ -1649,31 +1639,30 @@
   const char *exp = R"RAW_RESULT(
     define i64 @foo(i8* nest %nest.0, i32 %param1, i32 %param2, i64* %param3) #0 {
       entry:
-        %tmp.0 = alloca { i64, i64 }
-        %param1.addr = alloca i32
-        %param2.addr = alloca i32
-        %param3.addr = alloca i64*
-        %x = alloca i64
-        %y = alloca { i64, i64 }
-        store i32 %param1, i32* %param1.addr
-        store i32 %param2, i32* %param2.addr
-        store i64* %param3, i64** %param3.addr
-        store i64 0, i64* %x
-        %cast.0 = bitcast { i64, i64 }* %y to i8*
-        %cast.1 = bitcast { i64, i64 }* @const.0 to i8*
-        call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 %cast.1, i64 16, i1 false)
-        store i64 5, i64* %x
-        %x.ld.0 = load i64, i64* %x
-        %x.ld.1 = load i64, i64* %x
-        %field.0 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp.0, i32 0, i32 0
-        store i64 %x.ld.0, i64* %field.0
-        %field.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp.0, i32 0, i32 1
-        store i64 %x.ld.1, i64* %field.1
-        %cast.2 = bitcast { i64, i64 }* %y to i8*
-        %cast.3 = bitcast { i64, i64 }* %tmp.0 to i8*
-        call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 %cast.3, i64 16, i1 false)
-        ret i64 0
-      }
+      %tmp.0 = alloca { i64, i64 }
+      %param1.addr = alloca i32
+      %param2.addr = alloca i32
+      %param3.addr = alloca i64*
+      %x = alloca i64
+      %y = alloca { i64, i64 }
+      store i32 %param1, i32* %param1.addr
+      store i32 %param2, i32* %param2.addr
+      store i64* %param3, i64** %param3.addr
+      store i64 0, i64* %x
+      %cast.0 = bitcast { i64, i64 }* %y to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i64, i64 }* @const.0 to i8*), i64 16, i1 false)
+      store i64 5, i64* %x
+      %x.ld.0 = load i64, i64* %x
+      %x.ld.1 = load i64, i64* %x
+      %field.0 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp.0, i32 0, i32 0
+      store i64 %x.ld.0, i64* %field.0
+      %field.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp.0, i32 0, i32 1
+      store i64 %x.ld.1, i64* %field.1
+      %cast.1 = bitcast { i64, i64 }* %y to i8*
+      %cast.2 = bitcast { i64, i64 }* %tmp.0 to i8*
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 %cast.2, i64 16, i1 false)
+      ret i64 0
+    }
   )RAW_RESULT";
 
   bool broken = h.finish(StripDebugInfo);
diff --git a/unittests/BackendCore/BackendPointerExprTests.cpp b/unittests/BackendCore/BackendPointerExprTests.cpp
index 2cc63fd..75f4596 100644
--- a/unittests/BackendCore/BackendPointerExprTests.cpp
+++ b/unittests/BackendCore/BackendPointerExprTests.cpp
@@ -131,16 +131,15 @@
 
   const char *exp = R"RAW_RESULT(
     %cast.0 = bitcast { i64 }* %fdloc1 to i8*
-    %cast.1 = bitcast { i64 }* @const.0 to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 %cast.1, i64 8, i1 false)
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i64 }* @const.0 to i8*), i64 8, i1 false)
     store { i64 }* %fdloc1, { i64 }** %fploc1
     store { i64 (i8*, i32, i32, i64*)* }* null, { i64 (i8*, i32, i32, i64*)* }** %fploc2
     %fploc1.ld.0 = load { i64 }*, { i64 }** %fploc1
-    %cast.2 = bitcast { i64 }* %fploc1.ld.0 to { i64 (i8*, i32, i32, i64*)* }*
-    store { i64 (i8*, i32, i32, i64*)* }* %cast.2, { i64 (i8*, i32, i32, i64*)* }** %fploc2
+    %cast.1 = bitcast { i64 }* %fploc1.ld.0 to { i64 (i8*, i32, i32, i64*)* }*
+    store { i64 (i8*, i32, i32, i64*)* }* %cast.1, { i64 (i8*, i32, i32, i64*)* }** %fploc2
     %fploc2.ld.0 = load { i64 (i8*, i32, i32, i64*)* }*, { i64 (i8*, i32, i32, i64*)* }** %fploc2
-    %cast.3 = bitcast { i64 (i8*, i32, i32, i64*)* }* %fploc2.ld.0 to { i64 }*
-    store { i64 }* %cast.3, { i64 }** %fploc1
+    %cast.2 = bitcast { i64 (i8*, i32, i32, i64*)* }* %fploc2.ld.0 to { i64 }*
+    store { i64 }* %cast.2, { i64 }** %fploc1
   )RAW_RESULT";
 
   bool isOK = h.expectBlock(exp);
@@ -229,8 +228,7 @@
     %deref.ld.0 = load i32, i32* null
     store i32 %deref.ld.0, i32* %x
     %cast.2 = bitcast { i32, i32 }* %y to i8*
-    %cast.3 = bitcast { i32, i32 }* null to i8*
-    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 %cast.3, i64 8, i1 false)
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 null, i64 8, i1 false)
   )RAW_RESULT";
 
   bool isOK = h.expectBlock(exp);
diff --git a/unittests/BackendCore/BackendVarTests.cpp b/unittests/BackendCore/BackendVarTests.cpp
index ef2fbd4..6fac914 100644
--- a/unittests/BackendCore/BackendVarTests.cpp
+++ b/unittests/BackendCore/BackendVarTests.cpp
@@ -535,7 +535,7 @@
 
   const char *exp = R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
-    entry:
+      entry:
       %x = alloca i32
       %y = alloca { i32, i32 }
       %0 = bitcast i32* %x to i8*
@@ -544,8 +544,7 @@
       call void @llvm.lifetime.start.p0i8(i64 8, i8* %1)
       store i32 0, i32* %x
       %cast.0 = bitcast { i32, i32 }* %y to i8*
-      %cast.1 = bitcast { i32, i32 }* @const.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 %cast.1, i64 8, i1 false)
+      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 bitcast ({ i32, i32 }* @const.0 to i8*), i64 8, i1 false)
       %field.0 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %y, i32 0, i32 1
       %y.field.ld.0 = load i32, i32* %field.0
       store i32 %y.field.ld.0, i32* %x