gollvm: remaster unit test expected outputs following LLVM align change

Update the expected output dumps for the back end core unit tests
following a recent LLVM change that includes more alignment
information in IR dumps.

Updates golang/go#38728.

Change-Id: I24223dbfece0292652ee679c80602b6dfbe3dea9
Reviewed-on: https://go-review.googlesource.com/c/gollvm/+/232740
Reviewed-by: eric fang <eric.fang@arm.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/unittests/BackendCore/BackendArrayStruct.cpp b/unittests/BackendCore/BackendArrayStruct.cpp
index 8a558aa..a1f9eb0 100644
--- a/unittests/BackendCore/BackendArrayStruct.cpp
+++ b/unittests/BackendCore/BackendArrayStruct.cpp
@@ -81,17 +81,17 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %cast.0 = bitcast { i8*, i32 }* %loc1 to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i8*, i32 }* @const.0 to i8*), i64 16, i1 false)
-    store { i8*, i32 }* %loc1, { i8*, i32 }** %loc2
-    store i32 0, i32* %x
+    store { i8*, i32 }* %loc1, { i8*, i32 }** %loc2, align 8
+    store i32 0, i32* %x, align 4
     %field.0 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %loc1, i32 0, i32 1
     %loc1.field.ld.0 = load i32, i32* %field.0
-    store i32 %loc1.field.ld.0, i32* %x
-    store i8 0, i8* %b2
+    store i32 %loc1.field.ld.0, i32* %x, align 4
+    store i8 0, i8* %b2, align 1
     %field.1 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %loc1, i32 0, i32 0
-    store i8* %b2, i8** %field.1
+    store i8* %b2, i8** %field.1, align 8
     %loc2.ld.0 = load { i8*, i32 }*, { i8*, i32 }** %loc2
     %field.2 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %loc2.ld.0, i32 0, i32 1
-    store i32 2, i32* %field.2
+    store i32 2, i32* %field.2, align 4
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -146,25 +146,25 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
-    entry:
-      %tmp.0 = alloca { i8*, i32 }
-      %x = alloca i32
-      %y = alloca i32
-      %z = alloca i32
-      store i32 0, i32* %x
-      store i32 0, i32* %y
-      %y.ld.0 = load i32, i32* %y
-      %field.0 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %tmp.0, i32 0, i32 0
-      store i8* null, i8** %field.0
-      %field.1 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %tmp.0, i32 0, i32 1
-      store i32 %y.ld.0, i32* %field.1
-      %field.2 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %tmp.0, i32 0, i32 1
-      %.field.ld.0 = load i32, i32* %field.2
-      store i32 %.field.ld.0, i32* %x
-      store i32 0, i32* %z
-      store i32 42, i32* %z
-      ret void
-    }
+  entry:
+    %tmp.0 = alloca { i8*, i32 }
+    %x = alloca i32
+    %y = alloca i32
+    %z = alloca i32
+    store i32 0, i32* %x, align 4
+    store i32 0, i32* %y, align 4
+    %y.ld.0 = load i32, i32* %y
+    %field.0 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %tmp.0, i32 0, i32 0
+    store i8* null, i8** %field.0, align 8
+    %field.1 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %tmp.0, i32 0, i32 1
+    store i32 %y.ld.0, i32* %field.1, align 4
+    %field.2 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %tmp.0, i32 0, i32 1
+    %.field.ld.0 = load i32, i32* %field.2
+    store i32 %.field.ld.0, i32* %x, align 4
+    store i32 0, i32* %z, align 4
+    store i32 42, i32* %z, align 4
+    ret void
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(StripDebugInfo);
@@ -232,35 +232,35 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
-    entry:
-      %tmp.0 = alloca [4 x i64]
-      %x = alloca i64
-      %y = alloca i64
-      %z = alloca i64
-      %w = alloca i64
-      store i64 0, i64* %x
-      store i64 0, i64* %y
-      %y.ld.0 = load i64, i64* %y
-      %index.0 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 0
-      store i64 %y.ld.0, i64* %index.0
-      %index.1 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 1
-      store i64 3, i64* %index.1
-      %index.2 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 2
-      store i64 2, i64* %index.2
-      %index.3 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 3
-      store i64 1, i64* %index.3
-      %index.4 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 1
-      %.index.ld.0 = load i64, i64* %index.4
-      store i64 %.index.ld.0, i64* %x
-      store i64 0, i64* %z
-      store i64 3, i64* %z
-      store i64 0, i64* %w
-      %x.ld.0 = load i64, i64* %x
-      %index.5 = getelementptr [4 x i64], [4 x i64]* @const.0, i32 0, i64 %x.ld.0
-      %.index.ld.1 = load i64, i64* %index.5
-      store i64 %.index.ld.1, i64* %w
-      ret void
-    }
+  entry:
+    %tmp.0 = alloca [4 x i64]
+    %x = alloca i64
+    %y = alloca i64
+    %z = alloca i64
+    %w = alloca i64
+    store i64 0, i64* %x, align 8
+    store i64 0, i64* %y, align 8
+    %y.ld.0 = load i64, i64* %y
+    %index.0 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 0
+    store i64 %y.ld.0, i64* %index.0, align 8
+    %index.1 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 1
+    store i64 3, i64* %index.1, align 8
+    %index.2 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 2
+    store i64 2, i64* %index.2, align 8
+    %index.3 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 3
+    store i64 1, i64* %index.3, align 8
+    %index.4 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 1
+    %.index.ld.0 = load i64, i64* %index.4
+    store i64 %.index.ld.0, i64* %x, align 8
+    store i64 0, i64* %z, align 8
+    store i64 3, i64* %z, align 8
+    store i64 0, i64* %w, align 8
+    %x.ld.0 = load i64, i64* %x
+    %index.5 = getelementptr [4 x i64], [4 x i64]* @const.0, i32 0, i64 %x.ld.0
+    %.index.ld.1 = load i64, i64* %index.5
+    store i64 %.index.ld.1, i64* %w, align 8
+    ret void
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(StripDebugInfo);
@@ -310,16 +310,16 @@
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ([4 x i64]* @const.0 to i8*), i64 32, i1 false)
     %cast.1 = bitcast [4 x i64]* %ab to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 bitcast ([4 x i64]* @const.1 to i8*), i64 32, i1 false)
-    store i64 0, i64* %z
+    store i64 0, i64* %z, align 8
     %z.ld.0 = load i64, i64* %z
     %index.0 = getelementptr [4 x i64], [4 x i64]* %ac, i32 0, i32 0
-    store i64 0, i64* %index.0
+    store i64 0, i64* %index.0, align 8
     %index.1 = getelementptr [4 x i64], [4 x i64]* %ac, i32 0, i32 1
-    store i64 %z.ld.0, i64* %index.1
+    store i64 %z.ld.0, i64* %index.1, align 8
     %index.2 = getelementptr [4 x i64], [4 x i64]* %ac, i32 0, i32 2
-    store i64 0, i64* %index.2
+    store i64 0, i64* %index.2, align 8
     %index.3 = getelementptr [4 x i64], [4 x i64]* %ac, i32 0, i32 3
-    store i64 0, i64* %index.3
+    store i64 0, i64* %index.3, align 8
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -372,9 +372,9 @@
     %field.0 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %loc1, i32 0, i32 1
     %loc1.field.ld.0 = load i32, i32* %field.0
     %field.1 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %loc2, i32 0, i32 0
-    store i32* %param1.addr, i32** %field.1
+    store i32* %param1.addr, i32** %field.1, align 8
     %field.2 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %loc2, i32 0, i32 1
-    store i32 %loc1.field.ld.0, i32* %field.2
+    store i32 %loc1.field.ld.0, i32* %field.2, align 4
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -426,15 +426,15 @@
     %cast.0 = bitcast { { i32*, i32 }, float }* %loc1 to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ { i32*, i32 }, float }* @const.0 to i8*), i64 24, i1 false)
     %field.0 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %tmp.0, i32 0, i32 0
-    store i32* %param1.addr, i32** %field.0
+    store i32* %param1.addr, i32** %field.0, align 8
     %field.1 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %tmp.0, i32 0, i32 1
-    store i32 3, i32* %field.1
+    store i32 3, i32* %field.1, align 4
     %field.2 = getelementptr inbounds { { i32*, i32 }, float }, { { i32*, i32 }, float }* %loc1, i32 0, i32 0
     %cast.1 = bitcast { i32*, i32 }* %field.2 to i8*
     %cast.2 = bitcast { i32*, i32 }* %tmp.0 to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 %cast.2, i64 16, i1 false)
     %field.3 = getelementptr inbounds { { i32*, i32 }, float }, { { i32*, i32 }, float }* %loc1, i32 0, i32 1
-    store float 3.000000e+00, float* %field.3
+    store float 3.000000e+00, float* %field.3, align 4
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -475,9 +475,9 @@
     %p0.ld.0 = load { i32*, i32 }*, { i32*, i32 }** %p0.addr
     %p1.ld.0 = load i32*, i32** %p1.addr
     %field.0 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %p0.ld.0, i32 0, i32 0
-    store i32* %p1.ld.0, i32** %field.0
+    store i32* %p1.ld.0, i32** %field.0, align 8
     %field.1 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %p0.ld.0, i32 0, i32 1
-    store i32 101, i32* %field.1
+    store i32 101, i32* %field.1, align 4
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -527,10 +527,10 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %x.ld.0 = load i32, i32* @x
-    store i32 %x.ld.0, i32* getelementptr inbounds ({ i32 }, { i32 }* @t, i32 0, i32 0)
+    store i32 %x.ld.0, i32* getelementptr inbounds ({ i32 }, { i32 }* @t, i32 0, i32 0), align 4
     %t.field.ld.0 = load i32, i32* getelementptr inbounds ({ i32 }, { i32 }* @t, i32 0, i32 0)
     %field.2 = getelementptr inbounds { i32 }, { i32 }* %t2, i32 0, i32 0
-    store i32 %t.field.ld.0, i32* %field.2
+    store i32 %t.field.ld.0, i32* %field.2, align 4
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -589,7 +589,7 @@
     %aa.index.ld.1 = load i64, i64* %index.2
     %index.3 = getelementptr [4 x i64], [4 x i64]* %aa, i32 0, i64 %aa.index.ld.1
     %aa.index.ld.2 = load i64, i64* %index.3
-    store i64 %aa.index.ld.2, i64* %index.1
+    store i64 %aa.index.ld.2, i64* %index.1, align 8
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -649,15 +649,15 @@
     h.mkAssign(fx, bi64five);
 
     DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-      %cast.0 = bitcast [10 x { i8, [4 x { i64, i64 }*], i8 }*]* %t1 to i8*
-      call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ([10 x { i8, [4 x { i64, i64 }*], i8 }*]* @const.0 to i8*), i64 80, i1 false)
-      %index.0 = getelementptr [10 x { i8, [4 x { i64, i64 }*], i8 }*], [10 x { i8, [4 x { i64, i64 }*], i8 }*]* %t1, i32 0, i32 7
-      %t1.index.ld.0 = load { i8, [4 x { i64, i64 }*], i8 }*, { i8, [4 x { i64, i64 }*], i8 }** %index.0
-      %field.0 = getelementptr inbounds { i8, [4 x { i64, i64 }*], i8 }, { i8, [4 x { i64, i64 }*], i8 }* %t1.index.ld.0, i32 0, i32 1
-      %index.1 = getelementptr [4 x { i64, i64 }*], [4 x { i64, i64 }*]* %field.0, i32 0, i32 3
-      %.field.index.ld.0 = load { i64, i64 }*, { i64, i64 }** %index.1
-      %field.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %.field.index.ld.0, i32 0, i32 0
-      store i64 5, i64* %field.1
+    %cast.0 = bitcast [10 x { i8, [4 x { i64, i64 }*], i8 }*]* %t1 to i8*
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ([10 x { i8, [4 x { i64, i64 }*], i8 }*]* @const.0 to i8*), i64 80, i1 false)
+    %index.0 = getelementptr [10 x { i8, [4 x { i64, i64 }*], i8 }*], [10 x { i8, [4 x { i64, i64 }*], i8 }*]* %t1, i32 0, i32 7
+    %t1.index.ld.0 = load { i8, [4 x { i64, i64 }*], i8 }*, { i8, [4 x { i64, i64 }*], i8 }** %index.0
+    %field.0 = getelementptr inbounds { i8, [4 x { i64, i64 }*], i8 }, { i8, [4 x { i64, i64 }*], i8 }* %t1.index.ld.0, i32 0, i32 1
+    %index.1 = getelementptr [4 x { i64, i64 }*], [4 x { i64, i64 }*]* %field.0, i32 0, i32 3
+    %.field.index.ld.0 = load { i64, i64 }*, { i64, i64 }** %index.1
+    %field.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %.field.index.ld.0, i32 0, i32 0
+    store i64 5, i64* %field.1, align 8
     )RAW_RESULT");
 
     bool isOK = h.expectBlock(exp);
@@ -680,14 +680,14 @@
     h.mkLocal("q", bi64t, fx);
 
     DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-      %index.2 = getelementptr [10 x { i8, [4 x { i64, i64 }*], i8 }*], [10 x { i8, [4 x { i64, i64 }*], i8 }*]* %t1, i32 0, i32 0
-      %t1.index.ld.1 = load { i8, [4 x { i64, i64 }*], i8 }*, { i8, [4 x { i64, i64 }*], i8 }** %index.2
-      %field.2 = getelementptr inbounds { i8, [4 x { i64, i64 }*], i8 }, { i8, [4 x { i64, i64 }*], i8 }* %t1.index.ld.1, i32 0, i32 1
-      %index.3 = getelementptr [4 x { i64, i64 }*], [4 x { i64, i64 }*]* %field.2, i32 0, i32 0
-      %.field.index.ld.1 = load { i64, i64 }*, { i64, i64 }** %index.3
-      %field.3 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %.field.index.ld.1, i32 0, i32 1
-      %.field.ld.0 = load i64, i64* %field.3
-      store i64 %.field.ld.0, i64* %q
+    %index.2 = getelementptr [10 x { i8, [4 x { i64, i64 }*], i8 }*], [10 x { i8, [4 x { i64, i64 }*], i8 }*]* %t1, i32 0, i32 0
+    %t1.index.ld.1 = load { i8, [4 x { i64, i64 }*], i8 }*, { i8, [4 x { i64, i64 }*], i8 }** %index.2
+    %field.2 = getelementptr inbounds { i8, [4 x { i64, i64 }*], i8 }, { i8, [4 x { i64, i64 }*], i8 }* %t1.index.ld.1, i32 0, i32 1
+    %index.3 = getelementptr [4 x { i64, i64 }*], [4 x { i64, i64 }*]* %field.2, i32 0, i32 0
+    %.field.index.ld.1 = load { i64, i64 }*, { i64, i64 }** %index.3
+    %field.3 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %.field.index.ld.1, i32 0, i32 1
+    %.field.ld.0 = load i64, i64* %field.3
+    store i64 %.field.ld.0, i64* %q, align 8
     )RAW_RESULT");
 
     bool isOK = h.expectBlock(exp);
@@ -790,8 +790,8 @@
     %cast.0 = bitcast { i32 }* %t1 to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 bitcast ({ i32 }* @const.0 to i8*), i64 4, i1 false)
     %field.0 = getelementptr inbounds { i32 }, { i32 }* %t1, i32 0, i32 0
-    store i32* %field.0, i32** %a1
-    store i32* getelementptr inbounds ({ i32 }, { i32 }* @t2, i32 0, i32 0), i32** %a2
+    store i32* %field.0, i32** %a1, align 8
+    store i32* getelementptr inbounds ({ i32 }, { i32 }* @t2, i32 0, i32 0), i32** %a2, align 8
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
diff --git a/unittests/BackendCore/BackendCABIOracleTests.cpp b/unittests/BackendCore/BackendCABIOracleTests.cpp
index f01b819..7edf2e8 100644
--- a/unittests/BackendCore/BackendCABIOracleTests.cpp
+++ b/unittests/BackendCore/BackendCABIOracleTests.cpp
@@ -537,19 +537,19 @@
     %p4.ld.0 = load i8, i8* %p4.addr
     %cast.1 = bitcast { float, float, i16, i16, i16 }* %p0.addr to { <2 x float>, i48 }*
     %field0.0 = getelementptr inbounds { <2 x float>, i48 }, { <2 x float>, i48 }* %cast.1, i32 0, i32 0
-    %ld.1 = load <2 x float>, <2 x float>* %field0.0
+    %ld.1 = load <2 x float>, <2 x float>* %field0.0, align 8
     %field1.0 = getelementptr inbounds { <2 x float>, i48 }, { <2 x float>, i48 }* %cast.1, i32 0, i32 1
-    %ld.2 = load i48, i48* %field1.0
+    %ld.2 = load i48, i48* %field1.0, align 8
     %cast.2 = bitcast { double, float, float }* %p1.addr to { double, <2 x float> }*
     %field0.1 = getelementptr inbounds { double, <2 x float> }, { double, <2 x float> }* %cast.2, i32 0, i32 0
-    %ld.3 = load double, double* %field0.1
+    %ld.3 = load double, double* %field0.1, align 8
     %field1.1 = getelementptr inbounds { double, <2 x float> }, { double, <2 x float> }* %cast.2, i32 0, i32 1
-    %ld.4 = load <2 x float>, <2 x float>* %field1.1
+    %ld.4 = load <2 x float>, <2 x float>* %field1.1, align 8
     %call.0 = call addrspace(0) { double, <2 x float> } @foo(i8* nest undef, <2 x float> %ld.1, i48 %ld.2, double %ld.3, <2 x float> %ld.4, i8 zeroext %sub.0, i8 signext %p4.ld.0, { { float, float, i16, i16, i16 }, { double, float, float } }* byval %p5)
     %cast.3 = bitcast { double, float, float }* %sret.actual.0 to { double, <2 x float> }*
-    store { double, <2 x float> } %call.0, { double, <2 x float> }* %cast.3
+    store { double, <2 x float> } %call.0, { double, <2 x float> }* %cast.3, align 8
     %cast.4 = bitcast { double, float, float }* %sret.actual.0 to { double, <2 x float> }*
-    %ld.5 = load { double, <2 x float> }, { double, <2 x float> }* %cast.4
+    %ld.5 = load { double, <2 x float> }, { double, <2 x float> }* %cast.4, align 8
     ret { double, <2 x float> } %ld.5
   )RAW_RESULT");
 
@@ -656,22 +656,22 @@
     %p4.ld.0 = load i8, i8* %p4.addr
     %cast.1 = bitcast { float, float, i16, i16, i16 }* %p0.addr to { i64, i48 }*
     %field0.0 = getelementptr inbounds { i64, i48 }, { i64, i48 }* %cast.1, i32 0, i32 0
-    %ld.1 = load i64, i64* %field0.0
+    %ld.1 = load i64, i64* %field0.0, align 8
     %field1.0 = getelementptr inbounds { i64, i48 }, { i64, i48 }* %cast.1, i32 0, i32 1
-    %ld.2 = load i48, i48* %field1.0
+    %ld.2 = load i48, i48* %field1.0, align 8
     %cast.2 = bitcast { double, float, float }* %p1.addr to { i64, i64 }*
     %field0.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %cast.2, i32 0, i32 0
-    %ld.3 = load i64, i64* %field0.1
+    %ld.3 = load i64, i64* %field0.1, align 8
     %field1.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %cast.2, i32 0, i32 1
-    %ld.4 = load i64, i64* %field1.1
+    %ld.4 = load i64, i64* %field1.1, align 8
     %cast.3 = bitcast { { float, float, i16, i16, i16 }, { double, float, float } }* %doCopy.addr.0 to i8*
     %cast.4 = bitcast { { float, float, i16, i16, i16 }, { double, float, float } }* %p5 to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.3, i8* align 8 %cast.4, i64 32, i1 false)
     %call.0 = call addrspace(0) { i64, i64 } @foo(i8* nest undef, i64 %ld.1, i48 %ld.2, i64 %ld.3, i64 %ld.4, i8 zeroext %sub.0, i8 signext %p4.ld.0, { { float, float, i16, i16, i16 }, { double, float, float } }* %doCopy.addr.0)
     %cast.5 = bitcast { double, float, float }* %sret.actual.0 to { i64, i64 }*
-    store { i64, i64 } %call.0, { i64, i64 }* %cast.5
+    store { i64, i64 } %call.0, { i64, i64 }* %cast.5, align 8
     %cast.6 = bitcast { double, float, float }* %sret.actual.0 to { i64, i64 }*
-    %ld.5 = load { i64, i64 }, { i64, i64 }* %cast.6
+    %ld.5 = load { i64, i64 }, { i64, i64 }* %cast.6, align 8
     ret { i64, i64 } %ld.5
   )RAW_RESULT");
 
@@ -717,7 +717,7 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %cast.0 = bitcast [2 x float]* %p0.addr to <2 x float>*
-    %ld.0 = load <2 x float>, <2 x float>* %cast.0
+    %ld.0 = load <2 x float>, <2 x float>* %cast.0, align 8
     call addrspace(0) void @foo([3 x double]* sret "go_sret" %sret.actual.0, i8* nest undef, <2 x float> %ld.0)
     %cast.1 = bitcast [3 x double]* %sret.formal.0 to i8*
     %cast.2 = bitcast [3 x double]* %sret.actual.0 to i8*
@@ -760,12 +760,12 @@
   h.mkReturn(rvals);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    %ld.0 = load [2 x float], [2 x float]* %p0.addr
+    %ld.0 = load [2 x float], [2 x float]* %p0.addr, align 4
     %call.0 = call addrspace(0) { double, double, double } @foo(i8* nest undef, [2 x float] %ld.0)
     %cast.1 = bitcast [3 x double]* %sret.actual.0 to { double, double, double }*
-    store { double, double, double } %call.0, { double, double, double }* %cast.1
+    store { double, double, double } %call.0, { double, double, double }* %cast.1, align 8
     %cast.2 = bitcast [3 x double]* %sret.actual.0 to { double, double, double }*
-    %ld.1 = load { double, double, double }, { double, double, double }* %cast.2
+    %ld.1 = load { double, double, double }, { double, double, double }* %cast.2, align 8
     ret { double, double, double } %ld.1
   )RAW_RESULT");
 
@@ -896,25 +896,25 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %cast.0 = bitcast { float, float }* %p0.addr to <2 x float>*
-    %ld.0 = load <2 x float>, <2 x float>* %cast.0
+    %ld.0 = load <2 x float>, <2 x float>* %cast.0, align 8
     %field0.0 = getelementptr inbounds { double, double }, { double, double }* %p1.addr, i32 0, i32 0
-    %ld.1 = load double, double* %field0.0
+    %ld.1 = load double, double* %field0.0, align 8
     %field1.0 = getelementptr inbounds { double, double }, { double, double }* %p1.addr, i32 0, i32 1
-    %ld.2 = load double, double* %field1.0
+    %ld.2 = load double, double* %field1.0, align 8
     %call.0 = call addrspace(0) <2 x float> @foo(i8* nest undef, <2 x float> %ld.0, double %ld.1, double %ld.2)
     %cast.2 = bitcast { float, float }* %sret.actual.0 to <2 x float>*
-    store <2 x float> %call.0, <2 x float>* %cast.2
+    store <2 x float> %call.0, <2 x float>* %cast.2, align 8
     %cast.3 = bitcast { float, float }* %z to i8*
     %cast.4 = bitcast { float, float }* %sret.actual.0 to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.3, i8* align 4 %cast.4, i64 8, i1 false)
-    %ld.3 = load <2 x float>, <2 x float>* bitcast ({ float, float }* @const.0 to <2 x float>*)
-    %ld.4 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @const.1, i32 0, i32 0)
-    %ld.5 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @const.1, i32 0, i32 1)
+    %ld.3 = load <2 x float>, <2 x float>* bitcast ({ float, float }* @const.0 to <2 x float>*), align 8
+    %ld.4 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @const.1, i32 0, i32 0), align 8
+    %ld.5 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @const.1, i32 0, i32 1), align 8
     %call.1 = call addrspace(0) <2 x float> @foo(i8* nest undef, <2 x float> %ld.3, double %ld.4, double %ld.5)
     %cast.7 = bitcast { float, float }* %sret.actual.1 to <2 x float>*
-    store <2 x float> %call.1, <2 x float>* %cast.7
+    store <2 x float> %call.1, <2 x float>* %cast.7, align 8
     %cast.8 = bitcast { float, float }* %sret.actual.1 to <2 x float>*
-    %ld.6 = load <2 x float>, <2 x float>* %cast.8
+    %ld.6 = load <2 x float>, <2 x float>* %cast.8, align 8
     ret <2 x float> %ld.6
   )RAW_RESULT");
 
@@ -969,19 +969,19 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %cast.0 = bitcast { float, float }* %p0.addr to [2 x float]*
-    %ld.0 = load [2 x float], [2 x float]* %cast.0
+    %ld.0 = load [2 x float], [2 x float]* %cast.0, align 4
     %cast.1 = bitcast { double, double }* %p1.addr to [2 x double]*
-    %ld.1 = load [2 x double], [2 x double]* %cast.1
+    %ld.1 = load [2 x double], [2 x double]* %cast.1, align 8
     %call.0 = call addrspace(0) { float, float } @foo(i8* nest undef, [2 x float] %ld.0, [2 x double] %ld.1)
-    store { float, float } %call.0, { float, float }* %sret.actual.0
+    store { float, float } %call.0, { float, float }* %sret.actual.0, align 4
     %cast.3 = bitcast { float, float }* %z to i8*
     %cast.4 = bitcast { float, float }* %sret.actual.0 to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.3, i8* align 4 %cast.4, i64 8, i1 false)
-    %ld.2 = load [2 x float], [2 x float]* bitcast ({ float, float }* @const.0 to [2 x float]*)
-    %ld.3 = load [2 x double], [2 x double]* bitcast ({ double, double }* @const.1 to [2 x double]*)
+    %ld.2 = load [2 x float], [2 x float]* bitcast ({ float, float }* @const.0 to [2 x float]*), align 4
+    %ld.3 = load [2 x double], [2 x double]* bitcast ({ double, double }* @const.1 to [2 x double]*), align 8
     %call.1 = call addrspace(0) { float, float } @foo(i8* nest undef, [2 x float] %ld.2, [2 x double] %ld.3)
-    store { float, float } %call.1, { float, float }* %sret.actual.1
-    %ld.4 = load { float, float }, { float, float }* %sret.actual.1
+    store { float, float } %call.1, { float, float }* %sret.actual.1, align 4
+    %ld.4 = load { float, float }, { float, float }* %sret.actual.1, align 4
     ret { float, float } %ld.4
   )RAW_RESULT");
 
diff --git a/unittests/BackendCore/BackendCallTests.cpp b/unittests/BackendCore/BackendCallTests.cpp
index 47816fd..a1df19a 100644
--- a/unittests/BackendCore/BackendCallTests.cpp
+++ b/unittests/BackendCore/BackendCallTests.cpp
@@ -48,7 +48,7 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %call.0 = call addrspace(0) i64 @foo(i8* nest undef, i32 3, i32 6, i64* null)
-    store i64 %call.0, i64* %x
+    store i64 %call.0, i64* %x, align 8
     %x.ld.0 = load i64, i64* %x
     ret i64 %x.ld.0
   )RAW_RESULT");
@@ -143,19 +143,19 @@
 
   {
     DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-      %p0.ld.0 = load i8*, i8** %p0.addr
-      %field.0 = getelementptr inbounds { i8*, i32*, i64*, i64 }, { i8*, i32*, i64*, i64 }* %tmp.0, i32 0, i32 0
-      store i8* %p0.ld.0, i8** %field.0
-      %field.1 = getelementptr inbounds { i8*, i32*, i64*, i64 }, { i8*, i32*, i64*, i64 }* %tmp.0, i32 0, i32 1
-      store i32* null, i32** %field.1
-      %field.2 = getelementptr inbounds { i8*, i32*, i64*, i64 }, { i8*, i32*, i64*, i64 }* %tmp.0, i32 0, i32 2
-      store i64* null, i64** %field.2
-      %field.3 = getelementptr inbounds { i8*, i32*, i64*, i64 }, { i8*, i32*, i64*, i64 }* %tmp.0, i32 0, i32 3
-      store i64 101, i64* %field.3
-      %cast.2 = bitcast { i8*, i32*, i64*, i64 }* %sret.formal.0 to i8*
-      %cast.3 = bitcast { i8*, i32*, i64*, i64 }* %tmp.0 to i8*
-      call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 %cast.3, i64 32, i1 false)
-      ret void
+    %p0.ld.0 = load i8*, i8** %p0.addr
+    %field.0 = getelementptr inbounds { i8*, i32*, i64*, i64 }, { i8*, i32*, i64*, i64 }* %tmp.0, i32 0, i32 0
+    store i8* %p0.ld.0, i8** %field.0, align 8
+    %field.1 = getelementptr inbounds { i8*, i32*, i64*, i64 }, { i8*, i32*, i64*, i64 }* %tmp.0, i32 0, i32 1
+    store i32* null, i32** %field.1, align 8
+    %field.2 = getelementptr inbounds { i8*, i32*, i64*, i64 }, { i8*, i32*, i64*, i64 }* %tmp.0, i32 0, i32 2
+    store i64* null, i64** %field.2, align 8
+    %field.3 = getelementptr inbounds { i8*, i32*, i64*, i64 }, { i8*, i32*, i64*, i64 }* %tmp.0, i32 0, i32 3
+    store i64 101, i64* %field.3, align 8
+    %cast.2 = bitcast { i8*, i32*, i64*, i64 }* %sret.formal.0 to i8*
+    %cast.3 = bitcast { i8*, i32*, i64*, i64 }* %tmp.0 to i8*
+    call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 %cast.3, i64 32, i1 false)
+    ret void
     )RAW_RESULT");
 
     bool isOK = h.expectStmt(s2, exp);
diff --git a/unittests/BackendCore/BackendDebugEmit.cpp b/unittests/BackendCore/BackendDebugEmit.cpp
index 31fa907..352ab0d 100644
--- a/unittests/BackendCore/BackendDebugEmit.cpp
+++ b/unittests/BackendCore/BackendDebugEmit.cpp
@@ -43,13 +43,12 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
-    entry:
-      %x = alloca i32
-      store i32 0, i32* %x
-      call void @llvm.dbg.declare(metadata i32* %x, metadata !5,
-                                  metadata !DIExpression()), !dbg !12
-      ret void
-    }
+  entry:
+    %x = alloca i32
+    store i32 0, i32* %x, align 4
+    call void @llvm.dbg.declare(metadata i32* %x, metadata !5, metadata !DIExpression()), !dbg !12
+    ret void
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(PreserveDebugInfo);
diff --git a/unittests/BackendCore/BackendExprTests.cpp b/unittests/BackendCore/BackendExprTests.cpp
index c73e2c0..cc57995 100644
--- a/unittests/BackendCore/BackendExprTests.cpp
+++ b/unittests/BackendCore/BackendExprTests.cpp
@@ -244,11 +244,10 @@
   h.mkAssign(fex, mkInt32Const(be, 22));
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i64 0, i64* %x
+    store i64 0, i64* %x, align 8
     %cast.0 = bitcast i64* %x to { i32, i32 }*
-    %field.0 = getelementptr inbounds { i32, i32 },
-        { i32, i32 }* %cast.0, i32 0, i32 1
-    store i32 22, i32* %field.0
+    %field.0 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %cast.0, i32 0, i32 1
+    store i32 22, i32* %field.0, align 4
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -298,12 +297,12 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %param3.ld.0 = load i64*, i64** %param3.addr
     %cast.0 = bitcast i64* %param3.ld.0 to i32*
-    store i32 5, i32* %cast.0
-    store double 0.000000e+00, double* %p
+    store i32 5, i32* %cast.0, align 4
+    store double 0.000000e+00, double* %p, align 8
     %p.ld.0 = load double, double* %p
     %ftoui.0 = fptoui double %p.ld.0 to i64
     %itpcast.0 = inttoptr i64 %ftoui.0 to i32*
-    store i32 5, i32* %itpcast.0
+    store i32 5, i32* %itpcast.0, align 4
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -357,90 +356,90 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %p1.ld.0 = load double, double* %p1.addr
     %fptrunc.0 = fptrunc double %p1.ld.0 to float
-    store float %fptrunc.0, float* %p0.addr
+    store float %fptrunc.0, float* %p0.addr, align 4
     %p0.ld.0 = load float, float* %p0.addr
     %fpext.0 = fpext float %p0.ld.0 to double
-    store double %fpext.0, double* %p1.addr
+    store double %fpext.0, double* %p1.addr, align 8
     %p2.ld.0 = load i32, i32* %p2.addr
     %sitof.0 = sitofp i32 %p2.ld.0 to float
-    store float %sitof.0, float* %p0.addr
+    store float %sitof.0, float* %p0.addr, align 4
     %p0.ld.1 = load float, float* %p0.addr
     %ftosi.0 = fptosi float %p0.ld.1 to i32
-    store i32 %ftosi.0, i32* %p2.addr
+    store i32 %ftosi.0, i32* %p2.addr, align 4
     %p2.ld.1 = load i32, i32* %p2.addr
     %sitof.1 = sitofp i32 %p2.ld.1 to double
-    store double %sitof.1, double* %p1.addr
+    store double %sitof.1, double* %p1.addr, align 8
     %p1.ld.1 = load double, double* %p1.addr
     %ftosi.1 = fptosi double %p1.ld.1 to i32
-    store i32 %ftosi.1, i32* %p2.addr
+    store i32 %ftosi.1, i32* %p2.addr, align 4
     %p3.ld.0 = load i64, i64* %p3.addr
     %sitof.2 = sitofp i64 %p3.ld.0 to float
-    store float %sitof.2, float* %p0.addr
+    store float %sitof.2, float* %p0.addr, align 4
     %p0.ld.2 = load float, float* %p0.addr
     %ftosi.2 = fptosi float %p0.ld.2 to i64
-    store i64 %ftosi.2, i64* %p3.addr
+    store i64 %ftosi.2, i64* %p3.addr, align 8
     %p3.ld.1 = load i64, i64* %p3.addr
     %sitof.3 = sitofp i64 %p3.ld.1 to double
-    store double %sitof.3, double* %p1.addr
+    store double %sitof.3, double* %p1.addr, align 8
     %p1.ld.2 = load double, double* %p1.addr
     %ftosi.3 = fptosi double %p1.ld.2 to i64
-    store i64 %ftosi.3, i64* %p3.addr
+    store i64 %ftosi.3, i64* %p3.addr, align 8
     %p3.ld.2 = load i64, i64* %p3.addr
     %trunc.0 = trunc i64 %p3.ld.2 to i32
-    store i32 %trunc.0, i32* %p2.addr
+    store i32 %trunc.0, i32* %p2.addr, align 4
     %p2.ld.2 = load i32, i32* %p2.addr
     %sext.0 = sext i32 %p2.ld.2 to i64
-    store i64 %sext.0, i64* %p3.addr
+    store i64 %sext.0, i64* %p3.addr, align 8
     %p4.ld.0 = load i32, i32* %p4.addr
     %uitof.0 = uitofp i32 %p4.ld.0 to float
-    store float %uitof.0, float* %p0.addr
+    store float %uitof.0, float* %p0.addr, align 4
     %p0.ld.3 = load float, float* %p0.addr
     %ftoui.0 = fptoui float %p0.ld.3 to i32
-    store i32 %ftoui.0, i32* %p4.addr
+    store i32 %ftoui.0, i32* %p4.addr, align 4
     %p4.ld.1 = load i32, i32* %p4.addr
     %uitof.1 = uitofp i32 %p4.ld.1 to double
-    store double %uitof.1, double* %p1.addr
+    store double %uitof.1, double* %p1.addr, align 8
     %p1.ld.3 = load double, double* %p1.addr
     %ftoui.1 = fptoui double %p1.ld.3 to i32
-    store i32 %ftoui.1, i32* %p4.addr
+    store i32 %ftoui.1, i32* %p4.addr, align 4
     %p4.ld.2 = load i32, i32* %p4.addr
-    store i32 %p4.ld.2, i32* %p2.addr
+    store i32 %p4.ld.2, i32* %p2.addr, align 4
     %p2.ld.3 = load i32, i32* %p2.addr
-    store i32 %p2.ld.3, i32* %p4.addr
+    store i32 %p2.ld.3, i32* %p4.addr, align 4
     %p4.ld.3 = load i32, i32* %p4.addr
     %zext.0 = zext i32 %p4.ld.3 to i64
-    store i64 %zext.0, i64* %p3.addr
+    store i64 %zext.0, i64* %p3.addr, align 8
     %p3.ld.3 = load i64, i64* %p3.addr
     %trunc.1 = trunc i64 %p3.ld.3 to i32
-    store i32 %trunc.1, i32* %p4.addr
+    store i32 %trunc.1, i32* %p4.addr, align 4
     %p5.ld.0 = load i64, i64* %p5.addr
     %uitof.2 = uitofp i64 %p5.ld.0 to float
-    store float %uitof.2, float* %p0.addr
+    store float %uitof.2, float* %p0.addr, align 4
     %p0.ld.4 = load float, float* %p0.addr
     %ftoui.2 = fptoui float %p0.ld.4 to i64
-    store i64 %ftoui.2, i64* %p5.addr
+    store i64 %ftoui.2, i64* %p5.addr, align 8
     %p5.ld.1 = load i64, i64* %p5.addr
     %uitof.3 = uitofp i64 %p5.ld.1 to double
-    store double %uitof.3, double* %p1.addr
+    store double %uitof.3, double* %p1.addr, align 8
     %p1.ld.4 = load double, double* %p1.addr
     %ftoui.3 = fptoui double %p1.ld.4 to i64
-    store i64 %ftoui.3, i64* %p5.addr
+    store i64 %ftoui.3, i64* %p5.addr, align 8
     %p5.ld.2 = load i64, i64* %p5.addr
     %trunc.2 = trunc i64 %p5.ld.2 to i32
-    store i32 %trunc.2, i32* %p2.addr
+    store i32 %trunc.2, i32* %p2.addr, align 4
     %p2.ld.4 = load i32, i32* %p2.addr
     %sext.1 = sext i32 %p2.ld.4 to i64
-    store i64 %sext.1, i64* %p5.addr
+    store i64 %sext.1, i64* %p5.addr, align 8
     %p5.ld.3 = load i64, i64* %p5.addr
-    store i64 %p5.ld.3, i64* %p3.addr
+    store i64 %p5.ld.3, i64* %p3.addr, align 8
     %p3.ld.4 = load i64, i64* %p3.addr
-    store i64 %p3.ld.4, i64* %p5.addr
+    store i64 %p3.ld.4, i64* %p5.addr, align 8
     %p5.ld.4 = load i64, i64* %p5.addr
     %trunc.3 = trunc i64 %p5.ld.4 to i32
-    store i32 %trunc.3, i32* %p4.addr
+    store i32 %trunc.3, i32* %p4.addr, align 4
     %p4.ld.4 = load i32, i32* %p4.addr
     %zext.1 = zext i32 %p4.ld.4 to i64
-    store i64 %zext.1, i64* %p5.addr
+    store i64 %zext.1, i64* %p5.addr, align 8
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -495,63 +494,63 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
-    entry:
-      %tmp.3 = alloca { double, double }
-      %tmp.2 = alloca { float, float }
-      %tmp.1 = alloca { float, float }
-      %tmp.0 = alloca { double, double }
-      %a = alloca { float, float }
-      %b = alloca { float, float }
-      %x = alloca { double, double }
-      %y = alloca { double, double }
-      %cast.0 = bitcast { float, float }* %a to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 bitcast ({ float, float }* @const.0 to i8*), i64 8, i1 false)
-      %cast.1 = bitcast { float, float }* %b to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.1, i8* align 4 bitcast ({ float, float }* @const.0 to i8*), i64 8, i1 false)
-      %cast.2 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 bitcast ({ double, double }* @const.1 to i8*), i64 16, i1 false)
-      %cast.3 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.3, i8* align 8 bitcast ({ double, double }* @const.1 to i8*), i64 16, i1 false)
-      %cast.4 = bitcast { double, double }* %tmp.0 to i8*
-      %cast.5 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.4, i8* align 8 %cast.5, i64 16, i1 false)
-      %field.0 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 0
-      %.real.ld.0 = load double, double* %field.0
-      %fptrunc.0 = fptrunc double %.real.ld.0 to float
-      %field.1 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 1
-      %.imag.ld.0 = load double, double* %field.1
-      %fptrunc.1 = fptrunc double %.imag.ld.0 to float
-      %field.2 = getelementptr inbounds { float, float }, { float, float }* %tmp.1, i32 0, i32 0
-      store float %fptrunc.0, float* %field.2
-      %field.3 = getelementptr inbounds { float, float }, { float, float }* %tmp.1, i32 0, i32 1
-      store float %fptrunc.1, float* %field.3
-      %cast.6 = bitcast { float, float }* %a to i8*
-      %cast.7 = bitcast { float, float }* %tmp.1 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.6, i8* align 4 %cast.7, i64 8, i1 false)
-      %cast.8 = bitcast { float, float }* %tmp.2 to i8*
-      %cast.9 = bitcast { float, float }* %b to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.8, i8* align 4 %cast.9, i64 8, i1 false)
-      %field.4 = getelementptr inbounds { float, float }, { float, float }* %tmp.2, i32 0, i32 0
-      %.real.ld.1 = load float, float* %field.4
-      %fpext.0 = fpext float %.real.ld.1 to double
-      %field.5 = getelementptr inbounds { float, float }, { float, float }* %tmp.2, i32 0, i32 1
-      %.imag.ld.1 = load float, float* %field.5
-      %fpext.1 = fpext float %.imag.ld.1 to double
-      %field.6 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 0
-      store double %fpext.0, double* %field.6
-      %field.7 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 1
-      store double %fpext.1, double* %field.7
-      %cast.10 = bitcast { double, double }* %y to i8*
-      %cast.11 = bitcast { double, double }* %tmp.3 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.10, i8* align 8 %cast.11, i64 16, i1 false)
-      %cast.12 = bitcast { float, float }* %a to i8*
-      %cast.13 = bitcast { float, float }* %b to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.12, i8* align 4 %cast.13, i64 8, i1 false)
-      %cast.14 = bitcast { double, double }* %x to i8*
-      %cast.15 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.14, i8* align 8 %cast.15, i64 16, i1 false)
-      ret void
-    }
+  entry:
+    %tmp.3 = alloca { double, double }
+    %tmp.2 = alloca { float, float }
+    %tmp.1 = alloca { float, float }
+    %tmp.0 = alloca { double, double }
+    %a = alloca { float, float }
+    %b = alloca { float, float }
+    %x = alloca { double, double }
+    %y = alloca { double, double }
+    %cast.0 = bitcast { float, float }* %a to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 bitcast ({ float, float }* @const.0 to i8*), i64 8, i1 false)
+    %cast.1 = bitcast { float, float }* %b to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.1, i8* align 4 bitcast ({ float, float }* @const.0 to i8*), i64 8, i1 false)
+    %cast.2 = bitcast { double, double }* %x to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 bitcast ({ double, double }* @const.1 to i8*), i64 16, i1 false)
+    %cast.3 = bitcast { double, double }* %y to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.3, i8* align 8 bitcast ({ double, double }* @const.1 to i8*), i64 16, i1 false)
+    %cast.4 = bitcast { double, double }* %tmp.0 to i8*
+    %cast.5 = bitcast { double, double }* %x to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.4, i8* align 8 %cast.5, i64 16, i1 false)
+    %field.0 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 0
+    %.real.ld.0 = load double, double* %field.0
+    %fptrunc.0 = fptrunc double %.real.ld.0 to float
+    %field.1 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 1
+    %.imag.ld.0 = load double, double* %field.1
+    %fptrunc.1 = fptrunc double %.imag.ld.0 to float
+    %field.2 = getelementptr inbounds { float, float }, { float, float }* %tmp.1, i32 0, i32 0
+    store float %fptrunc.0, float* %field.2, align 4
+    %field.3 = getelementptr inbounds { float, float }, { float, float }* %tmp.1, i32 0, i32 1
+    store float %fptrunc.1, float* %field.3, align 4
+    %cast.6 = bitcast { float, float }* %a to i8*
+    %cast.7 = bitcast { float, float }* %tmp.1 to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.6, i8* align 4 %cast.7, i64 8, i1 false)
+    %cast.8 = bitcast { float, float }* %tmp.2 to i8*
+    %cast.9 = bitcast { float, float }* %b to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.8, i8* align 4 %cast.9, i64 8, i1 false)
+    %field.4 = getelementptr inbounds { float, float }, { float, float }* %tmp.2, i32 0, i32 0
+    %.real.ld.1 = load float, float* %field.4
+    %fpext.0 = fpext float %.real.ld.1 to double
+    %field.5 = getelementptr inbounds { float, float }, { float, float }* %tmp.2, i32 0, i32 1
+    %.imag.ld.1 = load float, float* %field.5
+    %fpext.1 = fpext float %.imag.ld.1 to double
+    %field.6 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 0
+    store double %fpext.0, double* %field.6, align 8
+    %field.7 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 1
+    store double %fpext.1, double* %field.7, align 8
+    %cast.10 = bitcast { double, double }* %y to i8*
+    %cast.11 = bitcast { double, double }* %tmp.3 to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.10, i8* align 8 %cast.11, i64 16, i1 false)
+    %cast.12 = bitcast { float, float }* %a to i8*
+    %cast.13 = bitcast { float, float }* %b to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.12, i8* align 4 %cast.13, i64 8, i1 false)
+    %cast.14 = bitcast { double, double }* %x to i8*
+    %cast.15 = bitcast { double, double }* %y to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.14, i8* align 8 %cast.15, i64 16, i1 false)
+    ret void
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(StripDebugInfo);
@@ -631,9 +630,9 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i64 0, i64* %x
-    store i64 0, i64* %y
-    store double 0.000000e+00, double* %z
+    store i64 0, i64* %x, align 8
+    store i64 0, i64* %y, align 8
+    store double 0.000000e+00, double* %z, align 8
     %x.ld.0 = load i64, i64* %x
     %icmp.0 = icmp eq i64 9, %x.ld.0
     %zext.0 = zext i1 %icmp.0 to i8
@@ -728,8 +727,8 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i64 0, i64* %x
-    store double 0.000000e+00, double* %y
+    store i64 0, i64* %x, align 8
+    store double 0.000000e+00, double* %y, align 8
     %x.ld.0 = load i64, i64* %x
     %add.0 = add i64 9, %x.ld.0
     %x.ld.1 = load i64, i64* %x
@@ -770,16 +769,16 @@
   h.mkAssign(vex, ypzpw);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i64 0, i64* %x
-    store i64 9, i64* %y
-    store i64 10, i64* %z
-    store i64 11, i64* %w
+    store i64 0, i64* %x, align 8
+    store i64 9, i64* %y, align 8
+    store i64 10, i64* %z, align 8
+    store i64 11, i64* %w, align 8
     %y.ld.0 = load i64, i64* %y
     %z.ld.0 = load i64, i64* %z
     %add.0 = add i64 %y.ld.0, %z.ld.0
     %w.ld.0 = load i64, i64* %w
     %add.1 = add i64 %add.0, %w.ld.0
-    store i64 %add.1, i64* %x
+    store i64 %add.1, i64* %x, align 8
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -826,12 +825,12 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i64 0, i64* %x
-    store i64 0, i64* %y
-    store i8 0, i8* %z
-    store i64 0, i64* %x2
-    store i64 0, i64* %y2
-    store i8 0, i8* %z2
+    store i64 0, i64* %x, align 8
+    store i64 0, i64* %y, align 8
+    store i8 0, i8* %z, align 1
+    store i64 0, i64* %x2, align 8
+    store i64 0, i64* %y2, align 8
+    store i8 0, i8* %z2, align 1
     %x.ld.0 = load i64, i64* %x
     %x2.ld.0 = load i64, i64* %x2
     %iand.0 = and i64 %x.ld.0, %x2.ld.0
@@ -932,9 +931,9 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i16 0, i16* %x
-    store i16 0, i16* %y
-    store double 0.000000e+00, double* %z
+    store i16 0, i16* %x, align 2
+    store i16 0, i16* %y, align 2
+    store double 0.000000e+00, double* %z, align 8
     %x.ld.0 = load i16, i16* %x
     %mul.0 = mul i16 -17, %x.ld.0
     %x.ld.1 = load i16, i16* %x
@@ -1012,10 +1011,10 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i64 0, i64* %x
-    store i64 0, i64* %y
-    store i64 0, i64* %s
-    store i32 0, i32* %z
+    store i64 0, i64* %x, align 8
+    store i64 0, i64* %y, align 8
+    store i64 0, i64* %s, align 8
+    store i32 0, i32* %z, align 4
     %x.ld.0 = load i64, i64* %x
     %s.ld.0 = load i64, i64* %s
     %shl.0 = shl i64 %x.ld.0, %s.ld.0
@@ -1076,154 +1075,154 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
-    entry:
-      %tmp.12 = alloca { double, double }
-      %tmp.11 = alloca { double, double }
-      %tmp.10 = alloca { double, double }
-      %tmp.9 = alloca { double, double }
-      %tmp.8 = alloca { double, double }
-      %tmp.7 = alloca { double, double }
-      %tmp.6 = alloca { double, double }
-      %tmp.5 = alloca { double, double }
-      %tmp.4 = alloca { double, double }
-      %tmp.3 = alloca { double, double }
-      %tmp.2 = alloca { double, double }
-      %tmp.1 = alloca { double, double }
-      %tmp.0 = alloca { double, double }
-      %x = alloca { double, double }
-      %y = alloca { double, double }
-      %z = alloca { double, double }
-      %b = alloca i8
-      %cast.0 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
-      %cast.1 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
-      %cast.2 = bitcast { double, double }* %z to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
-      store i8 0, i8* %b
-      %cast.3 = bitcast { double, double }* %tmp.0 to i8*
-      %cast.4 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.3, i8* align 8 %cast.4, i64 16, i1 false)
-      %cast.5 = bitcast { double, double }* %tmp.1 to i8*
-      %cast.6 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.5, i8* align 8 %cast.6, i64 16, i1 false)
-      %field.0 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 0
-      %.real.ld.0 = load double, double* %field.0
-      %field.1 = getelementptr inbounds { double, double }, { double, double }* %tmp.1, i32 0, i32 0
-      %.real.ld.1 = load double, double* %field.1
-      %fadd.0 = fadd double %.real.ld.0, %.real.ld.1
-      %field.2 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 1
-      %.imag.ld.0 = load double, double* %field.2
-      %field.3 = getelementptr inbounds { double, double }, { double, double }* %tmp.1, i32 0, i32 1
-      %.imag.ld.1 = load double, double* %field.3
-      %fadd.1 = fadd double %.imag.ld.0, %.imag.ld.1
-      %field.4 = getelementptr inbounds { double, double }, { double, double }* %tmp.2, i32 0, i32 0
-      store double %fadd.0, double* %field.4
-      %field.5 = getelementptr inbounds { double, double }, { double, double }* %tmp.2, i32 0, i32 1
-      store double %fadd.1, double* %field.5
-      %cast.7 = bitcast { double, double }* %z to i8*
-      %cast.8 = bitcast { double, double }* %tmp.2 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.7, i8* align 8 %cast.8, i64 16, i1 false)
-      %cast.9 = bitcast { double, double }* %tmp.3 to i8*
-      %cast.10 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.9, i8* align 8 %cast.10, i64 16, i1 false)
-      %cast.11 = bitcast { double, double }* %tmp.4 to i8*
-      %cast.12 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.11, i8* align 8 %cast.12, i64 16, i1 false)
-      %field.6 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 0
-      %.real.ld.2 = load double, double* %field.6
-      %field.7 = getelementptr inbounds { double, double }, { double, double }* %tmp.4, i32 0, i32 0
-      %.real.ld.3 = load double, double* %field.7
-      %fsub.0 = fsub double %.real.ld.2, %.real.ld.3
-      %field.8 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 1
-      %.imag.ld.2 = load double, double* %field.8
-      %field.9 = getelementptr inbounds { double, double }, { double, double }* %tmp.4, i32 0, i32 1
-      %.imag.ld.3 = load double, double* %field.9
-      %fsub.1 = fsub double %.imag.ld.2, %.imag.ld.3
-      %field.10 = getelementptr inbounds { double, double }, { double, double }* %tmp.5, i32 0, i32 0
-      store double %fsub.0, double* %field.10
-      %field.11 = getelementptr inbounds { double, double }, { double, double }* %tmp.5, i32 0, i32 1
-      store double %fsub.1, double* %field.11
-      %cast.13 = bitcast { double, double }* %z to i8*
-      %cast.14 = bitcast { double, double }* %tmp.5 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.13, i8* align 8 %cast.14, i64 16, i1 false)
-      %cast.15 = bitcast { double, double }* %tmp.6 to i8*
-      %cast.16 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.15, i8* align 8 %cast.16, i64 16, i1 false)
-      %cast.17 = bitcast { double, double }* %tmp.7 to i8*
-      %cast.18 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.17, i8* align 8 %cast.18, i64 16, i1 false)
-      %field.12 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 0
-      %.real.ld.4 = load double, double* %field.12
-      %field.13 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 0
-      %.real.ld.5 = load double, double* %field.13
-      %fmul.0 = fmul double %.real.ld.4, %.real.ld.5
-      %field.14 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 1
-      %.imag.ld.4 = load double, double* %field.14
-      %field.15 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 1
-      %.imag.ld.5 = load double, double* %field.15
-      %fmul.1 = fmul double %.imag.ld.4, %.imag.ld.5
-      %fsub.2 = fsub double %fmul.0, %fmul.1
-      %field.16 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 0
-      %.field.ld.0 = load double, double* %field.16
-      %field.17 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 1
-      %.field.ld.1 = load double, double* %field.17
-      %fmul.2 = fmul double %.field.ld.0, %.field.ld.1
-      %field.18 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 1
-      %.field.ld.2 = load double, double* %field.18
-      %field.19 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 0
-      %.field.ld.3 = load double, double* %field.19
-      %fmul.3 = fmul double %.field.ld.2, %.field.ld.3
-      %fadd.2 = fadd double %fmul.2, %fmul.3
-      %field.20 = getelementptr inbounds { double, double }, { double, double }* %tmp.8, i32 0, i32 0
-      store double %fsub.2, double* %field.20
-      %field.21 = getelementptr inbounds { double, double }, { double, double }* %tmp.8, i32 0, i32 1
-      store double %fadd.2, double* %field.21
-      %cast.19 = bitcast { double, double }* %z to i8*
-      %cast.20 = bitcast { double, double }* %tmp.8 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.19, i8* align 8 %cast.20, i64 16, i1 false)
-      %cast.21 = bitcast { double, double }* %tmp.9 to i8*
-      %cast.22 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.21, i8* align 8 %cast.22, i64 16, i1 false)
-      %cast.23 = bitcast { double, double }* %tmp.10 to i8*
-      %cast.24 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.23, i8* align 8 %cast.24, i64 16, i1 false)
-      %field.22 = getelementptr inbounds { double, double }, { double, double }* %tmp.9, i32 0, i32 0
-      %.real.ld.6 = load double, double* %field.22
-      %field.23 = getelementptr inbounds { double, double }, { double, double }* %tmp.10, i32 0, i32 0
-      %.real.ld.7 = load double, double* %field.23
-      %fcmp.0 = fcmp oeq double %.real.ld.6, %.real.ld.7
-      %zext.0 = zext i1 %fcmp.0 to i8
-      %field.24 = getelementptr inbounds { double, double }, { double, double }* %tmp.9, i32 0, i32 1
-      %.imag.ld.6 = load double, double* %field.24
-      %field.25 = getelementptr inbounds { double, double }, { double, double }* %tmp.10, i32 0, i32 1
-      %.imag.ld.7 = load double, double* %field.25
-      %fcmp.1 = fcmp oeq double %.imag.ld.6, %.imag.ld.7
-      %zext.1 = zext i1 %fcmp.1 to i8
-      %iand.0 = and i8 %zext.0, %zext.1
-      store i8 %iand.0, i8* %b
-      %cast.25 = bitcast { double, double }* %tmp.11 to i8*
-      %cast.26 = bitcast { double, double }* %x to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.25, i8* align 8 %cast.26, i64 16, i1 false)
-      %cast.27 = bitcast { double, double }* %tmp.12 to i8*
-      %cast.28 = bitcast { double, double }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.27, i8* align 8 %cast.28, i64 16, i1 false)
-      %field.26 = getelementptr inbounds { double, double }, { double, double }* %tmp.11, i32 0, i32 0
-      %.real.ld.8 = load double, double* %field.26
-      %field.27 = getelementptr inbounds { double, double }, { double, double }* %tmp.12, i32 0, i32 0
-      %.real.ld.9 = load double, double* %field.27
-      %fcmp.2 = fcmp une double %.real.ld.8, %.real.ld.9
-      %zext.2 = zext i1 %fcmp.2 to i8
-      %field.28 = getelementptr inbounds { double, double }, { double, double }* %tmp.11, i32 0, i32 1
-      %.imag.ld.8 = load double, double* %field.28
-      %field.29 = getelementptr inbounds { double, double }, { double, double }* %tmp.12, i32 0, i32 1
-      %.imag.ld.9 = load double, double* %field.29
-      %fcmp.3 = fcmp une double %.imag.ld.8, %.imag.ld.9
-      %zext.3 = zext i1 %fcmp.3 to i8
-      %ior.0 = or i8 %zext.2, %zext.3
-      store i8 %ior.0, i8* %b
-      ret void
-    }
+  entry:
+    %tmp.12 = alloca { double, double }
+    %tmp.11 = alloca { double, double }
+    %tmp.10 = alloca { double, double }
+    %tmp.9 = alloca { double, double }
+    %tmp.8 = alloca { double, double }
+    %tmp.7 = alloca { double, double }
+    %tmp.6 = alloca { double, double }
+    %tmp.5 = alloca { double, double }
+    %tmp.4 = alloca { double, double }
+    %tmp.3 = alloca { double, double }
+    %tmp.2 = alloca { double, double }
+    %tmp.1 = alloca { double, double }
+    %tmp.0 = alloca { double, double }
+    %x = alloca { double, double }
+    %y = alloca { double, double }
+    %z = alloca { double, double }
+    %b = alloca i8
+    %cast.0 = bitcast { double, double }* %x to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
+    %cast.1 = bitcast { double, double }* %y to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
+    %cast.2 = bitcast { double, double }* %z to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.2, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
+    store i8 0, i8* %b, align 1
+    %cast.3 = bitcast { double, double }* %tmp.0 to i8*
+    %cast.4 = bitcast { double, double }* %x to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.3, i8* align 8 %cast.4, i64 16, i1 false)
+    %cast.5 = bitcast { double, double }* %tmp.1 to i8*
+    %cast.6 = bitcast { double, double }* %y to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.5, i8* align 8 %cast.6, i64 16, i1 false)
+    %field.0 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 0
+    %.real.ld.0 = load double, double* %field.0
+    %field.1 = getelementptr inbounds { double, double }, { double, double }* %tmp.1, i32 0, i32 0
+    %.real.ld.1 = load double, double* %field.1
+    %fadd.0 = fadd double %.real.ld.0, %.real.ld.1
+    %field.2 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 1
+    %.imag.ld.0 = load double, double* %field.2
+    %field.3 = getelementptr inbounds { double, double }, { double, double }* %tmp.1, i32 0, i32 1
+    %.imag.ld.1 = load double, double* %field.3
+    %fadd.1 = fadd double %.imag.ld.0, %.imag.ld.1
+    %field.4 = getelementptr inbounds { double, double }, { double, double }* %tmp.2, i32 0, i32 0
+    store double %fadd.0, double* %field.4, align 8
+    %field.5 = getelementptr inbounds { double, double }, { double, double }* %tmp.2, i32 0, i32 1
+    store double %fadd.1, double* %field.5, align 8
+    %cast.7 = bitcast { double, double }* %z to i8*
+    %cast.8 = bitcast { double, double }* %tmp.2 to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.7, i8* align 8 %cast.8, i64 16, i1 false)
+    %cast.9 = bitcast { double, double }* %tmp.3 to i8*
+    %cast.10 = bitcast { double, double }* %x to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.9, i8* align 8 %cast.10, i64 16, i1 false)
+    %cast.11 = bitcast { double, double }* %tmp.4 to i8*
+    %cast.12 = bitcast { double, double }* %y to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.11, i8* align 8 %cast.12, i64 16, i1 false)
+    %field.6 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 0
+    %.real.ld.2 = load double, double* %field.6
+    %field.7 = getelementptr inbounds { double, double }, { double, double }* %tmp.4, i32 0, i32 0
+    %.real.ld.3 = load double, double* %field.7
+    %fsub.0 = fsub double %.real.ld.2, %.real.ld.3
+    %field.8 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 1
+    %.imag.ld.2 = load double, double* %field.8
+    %field.9 = getelementptr inbounds { double, double }, { double, double }* %tmp.4, i32 0, i32 1
+    %.imag.ld.3 = load double, double* %field.9
+    %fsub.1 = fsub double %.imag.ld.2, %.imag.ld.3
+    %field.10 = getelementptr inbounds { double, double }, { double, double }* %tmp.5, i32 0, i32 0
+    store double %fsub.0, double* %field.10, align 8
+    %field.11 = getelementptr inbounds { double, double }, { double, double }* %tmp.5, i32 0, i32 1
+    store double %fsub.1, double* %field.11, align 8
+    %cast.13 = bitcast { double, double }* %z to i8*
+    %cast.14 = bitcast { double, double }* %tmp.5 to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.13, i8* align 8 %cast.14, i64 16, i1 false)
+    %cast.15 = bitcast { double, double }* %tmp.6 to i8*
+    %cast.16 = bitcast { double, double }* %x to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.15, i8* align 8 %cast.16, i64 16, i1 false)
+    %cast.17 = bitcast { double, double }* %tmp.7 to i8*
+    %cast.18 = bitcast { double, double }* %y to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.17, i8* align 8 %cast.18, i64 16, i1 false)
+    %field.12 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 0
+    %.real.ld.4 = load double, double* %field.12
+    %field.13 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 0
+    %.real.ld.5 = load double, double* %field.13
+    %fmul.0 = fmul double %.real.ld.4, %.real.ld.5
+    %field.14 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 1
+    %.imag.ld.4 = load double, double* %field.14
+    %field.15 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 1
+    %.imag.ld.5 = load double, double* %field.15
+    %fmul.1 = fmul double %.imag.ld.4, %.imag.ld.5
+    %fsub.2 = fsub double %fmul.0, %fmul.1
+    %field.16 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 0
+    %.field.ld.0 = load double, double* %field.16
+    %field.17 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 1
+    %.field.ld.1 = load double, double* %field.17
+    %fmul.2 = fmul double %.field.ld.0, %.field.ld.1
+    %field.18 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 1
+    %.field.ld.2 = load double, double* %field.18
+    %field.19 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 0
+    %.field.ld.3 = load double, double* %field.19
+    %fmul.3 = fmul double %.field.ld.2, %.field.ld.3
+    %fadd.2 = fadd double %fmul.2, %fmul.3
+    %field.20 = getelementptr inbounds { double, double }, { double, double }* %tmp.8, i32 0, i32 0
+    store double %fsub.2, double* %field.20, align 8
+    %field.21 = getelementptr inbounds { double, double }, { double, double }* %tmp.8, i32 0, i32 1
+    store double %fadd.2, double* %field.21, align 8
+    %cast.19 = bitcast { double, double }* %z to i8*
+    %cast.20 = bitcast { double, double }* %tmp.8 to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.19, i8* align 8 %cast.20, i64 16, i1 false)
+    %cast.21 = bitcast { double, double }* %tmp.9 to i8*
+    %cast.22 = bitcast { double, double }* %x to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.21, i8* align 8 %cast.22, i64 16, i1 false)
+    %cast.23 = bitcast { double, double }* %tmp.10 to i8*
+    %cast.24 = bitcast { double, double }* %y to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.23, i8* align 8 %cast.24, i64 16, i1 false)
+    %field.22 = getelementptr inbounds { double, double }, { double, double }* %tmp.9, i32 0, i32 0
+    %.real.ld.6 = load double, double* %field.22
+    %field.23 = getelementptr inbounds { double, double }, { double, double }* %tmp.10, i32 0, i32 0
+    %.real.ld.7 = load double, double* %field.23
+    %fcmp.0 = fcmp oeq double %.real.ld.6, %.real.ld.7
+    %zext.0 = zext i1 %fcmp.0 to i8
+    %field.24 = getelementptr inbounds { double, double }, { double, double }* %tmp.9, i32 0, i32 1
+    %.imag.ld.6 = load double, double* %field.24
+    %field.25 = getelementptr inbounds { double, double }, { double, double }* %tmp.10, i32 0, i32 1
+    %.imag.ld.7 = load double, double* %field.25
+    %fcmp.1 = fcmp oeq double %.imag.ld.6, %.imag.ld.7
+    %zext.1 = zext i1 %fcmp.1 to i8
+    %iand.0 = and i8 %zext.0, %zext.1
+    store i8 %iand.0, i8* %b, align 1
+    %cast.25 = bitcast { double, double }* %tmp.11 to i8*
+    %cast.26 = bitcast { double, double }* %x to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.25, i8* align 8 %cast.26, i64 16, i1 false)
+    %cast.27 = bitcast { double, double }* %tmp.12 to i8*
+    %cast.28 = bitcast { double, double }* %y to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.27, i8* align 8 %cast.28, i64 16, i1 false)
+    %field.26 = getelementptr inbounds { double, double }, { double, double }* %tmp.11, i32 0, i32 0
+    %.real.ld.8 = load double, double* %field.26
+    %field.27 = getelementptr inbounds { double, double }, { double, double }* %tmp.12, i32 0, i32 0
+    %.real.ld.9 = load double, double* %field.27
+    %fcmp.2 = fcmp une double %.real.ld.8, %.real.ld.9
+    %zext.2 = zext i1 %fcmp.2 to i8
+    %field.28 = getelementptr inbounds { double, double }, { double, double }* %tmp.11, i32 0, i32 1
+    %.imag.ld.8 = load double, double* %field.28
+    %field.29 = getelementptr inbounds { double, double }, { double, double }* %tmp.12, i32 0, i32 1
+    %.imag.ld.9 = load double, double* %field.29
+    %fcmp.3 = fcmp une double %.imag.ld.8, %.imag.ld.9
+    %zext.3 = zext i1 %fcmp.3 to i8
+    %ior.0 = or i8 %zext.2, %zext.3
+    store i8 %ior.0, i8* %b, align 1
+    ret void
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(StripDebugInfo);
@@ -1267,22 +1266,22 @@
   h.mkAssign(xvex3, compex);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store double 0.000000e+00, double* %a
-    store double 0.000000e+00, double* %b
+    store double 0.000000e+00, double* %a, align 8
+    store double 0.000000e+00, double* %b, align 8
     %cast.0 = bitcast { double, double }* %x to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
     %field.0 = getelementptr inbounds { double, double }, { double, double }* %x, i32 0, i32 0
     %x.real.ld.0 = load double, double* %field.0
-    store double %x.real.ld.0, double* %a
+    store double %x.real.ld.0, double* %a, align 8
     %field.1 = getelementptr inbounds { double, double }, { double, double }* %x, i32 0, i32 1
     %x.imag.ld.0 = load double, double* %field.1
-    store double %x.imag.ld.0, double* %b
+    store double %x.imag.ld.0, double* %b, align 8
     %b.ld.0 = load double, double* %b
     %a.ld.0 = load double, double* %a
     %field.2 = getelementptr inbounds { double, double }, { double, double }* %x, i32 0, i32 0
-    store double %b.ld.0, double* %field.2
+    store double %b.ld.0, double* %field.2, align 8
     %field.3 = getelementptr inbounds { double, double }, { double, double }* %x, i32 0, i32 1
-    store double %a.ld.0, double* %field.3
+    store double %a.ld.0, double* %field.3, align 8
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -1344,29 +1343,29 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
-    entry:
-      %a = alloca i64
-      %b = alloca i64
-      store i64 0, i64* %a
-      store i64 0, i64* %b
-      %a.ld.0 = load i64, i64* %a
-      %b.ld.0 = load i64, i64* %b
-      %icmp.0 = icmp slt i64 %a.ld.0, %b.ld.0
-      %zext.0 = zext i1 %icmp.0 to i8
-      %trunc.0 = trunc i8 %zext.0 to i1
-      br i1 %trunc.0, label %then.0, label %else.0
-
-    then.0:                                           ; preds = %entry
-      call void @foo(i8* nest undef)
-      br label %fallthrough.0
-
-    fallthrough.0:                                    ; preds = %else.0, %then.0
-      ret void
-
-    else.0:                                           ; preds = %entry
-      call void @foo(i8* nest undef)
-      br label %fallthrough.0
-    }
+  entry:
+    %a = alloca i64
+    %b = alloca i64
+    store i64 0, i64* %a, align 8
+    store i64 0, i64* %b, align 8
+    %a.ld.0 = load i64, i64* %a
+    %b.ld.0 = load i64, i64* %b
+    %icmp.0 = icmp slt i64 %a.ld.0, %b.ld.0
+    %zext.0 = zext i1 %icmp.0 to i8
+    %trunc.0 = trunc i8 %zext.0 to i1
+    br i1 %trunc.0, label %then.0, label %else.0
+  
+  then.0:                                           ; preds = %entry
+    call void @foo(i8* nest undef)
+    br label %fallthrough.0
+  
+  fallthrough.0:                                    ; preds = %else.0, %then.0
+    ret void
+  
+  else.0:                                           ; preds = %entry
+    call void @foo(i8* nest undef)
+    br label %fallthrough.0
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(StripDebugInfo);
@@ -1401,22 +1400,25 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
-    entry:
-      %a = alloca i64
-      %tmpv.0 = alloca i64
-      store i64 0, i64* %a
-      br i1 true, label %then.0, label %else.0
-    then.0:                                           ; preds = %entry
-      call void @foo(i8* nest undef)
-      br label %fallthrough.0
-    fallthrough.0:                                    ; preds = %else.0, %then.0
-      %tmpv.0.ld.0 = load i64, i64* %tmpv.0
-      ret void
-    else.0:                                           ; preds = %entry
-      %a.ld.0 = load i64, i64* %a
-      store i64 %a.ld.0, i64* %tmpv.0
-      br label %fallthrough.0
-    }
+  entry:
+    %a = alloca i64
+    %tmpv.0 = alloca i64
+    store i64 0, i64* %a, align 8
+    br i1 true, label %then.0, label %else.0
+  
+  then.0:                                           ; preds = %entry
+    call void @foo(i8* nest undef)
+    br label %fallthrough.0
+  
+  fallthrough.0:                                    ; preds = %else.0, %then.0
+    %tmpv.0.ld.0 = load i64, i64* %tmpv.0
+    ret void
+  
+  else.0:                                           ; preds = %entry
+    %a.ld.0 = load i64, i64* %a
+    store i64 %a.ld.0, i64* %tmpv.0, align 8
+    br label %fallthrough.0
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(StripDebugInfo);
@@ -1454,34 +1456,34 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo({ [16 x i32], i32 }* sret %sret.formal.0, i8* nest %nest.0, { [16 x i32], i32 }* byval %p0, i32 %p1) #0 {
-    entry:
-      %p1.addr = alloca i32
-      %a = alloca { [16 x i32], i32 }
-      %tmpv.0 = alloca { [16 x i32], i32 }
-      store i32 %p1, i32* %p1.addr
-      %p1.ld.0 = load i32, i32* %p1.addr
-      %icmp.0 = icmp slt i32 %p1.ld.0, 7
-      %zext.0 = zext i1 %icmp.0 to i8
-      %trunc.0 = trunc i8 %zext.0 to i1
-      br i1 %trunc.0, label %then.0, label %else.0
-
-    then.0:                                           ; preds = %entry
-      %cast.0 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
-      %cast.1 = bitcast { [16 x i32], i32 }* %p0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 %cast.1, i64 68, i1 false)
-      br label %fallthrough.0
-
-    fallthrough.0:                                    ; preds = %else.0, %then.0
-      %cast.3 = bitcast { [16 x i32], i32 }* %a to i8*
-      %cast.4 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.3, i8* align 4 %cast.4, i64 68, i1 false)
-      ret void
-
-    else.0:                                           ; preds = %entry
-      %cast.2 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 bitcast ({ [16 x i32], i32 }* @const.0 to i8*), i64 68, i1 false)
-      br label %fallthrough.0
-    }
+  entry:
+    %p1.addr = alloca i32
+    %a = alloca { [16 x i32], i32 }
+    %tmpv.0 = alloca { [16 x i32], i32 }
+    store i32 %p1, i32* %p1.addr, align 4
+    %p1.ld.0 = load i32, i32* %p1.addr
+    %icmp.0 = icmp slt i32 %p1.ld.0, 7
+    %zext.0 = zext i1 %icmp.0 to i8
+    %trunc.0 = trunc i8 %zext.0 to i1
+    br i1 %trunc.0, label %then.0, label %else.0
+  
+  then.0:                                           ; preds = %entry
+    %cast.0 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
+    %cast.1 = bitcast { [16 x i32], i32 }* %p0 to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 %cast.1, i64 68, i1 false)
+    br label %fallthrough.0
+  
+  fallthrough.0:                                    ; preds = %else.0, %then.0
+    %cast.3 = bitcast { [16 x i32], i32 }* %a to i8*
+    %cast.4 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.3, i8* align 4 %cast.4, i64 68, i1 false)
+    ret void
+  
+  else.0:                                           ; preds = %entry
+    %cast.2 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 bitcast ({ [16 x i32], i32 }* @const.0 to i8*), i64 68, i1 false)
+    br label %fallthrough.0
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(StripDebugInfo);
@@ -1529,34 +1531,34 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo({ [16 x i32], i32 }* sret %sret.formal.0, i8* nest %nest.0, { [16 x i32], i32 }* %p0, i32 %p1) #0 {
-    entry:
-      %p1.addr = alloca i32
-      %a = alloca { [16 x i32], i32 }
-      %tmpv.0 = alloca { [16 x i32], i32 }
-      store i32 %p1, i32* %p1.addr
-      %p1.ld.0 = load i32, i32* %p1.addr
-      %icmp.0 = icmp slt i32 %p1.ld.0, 7
-      %zext.0 = zext i1 %icmp.0 to i8
-      %trunc.0 = trunc i8 %zext.0 to i1
-      br i1 %trunc.0, label %then.0, label %else.0
-
-    then.0:                                           ; preds = %entry
-      %cast.0 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
-      %cast.1 = bitcast { [16 x i32], i32 }* %p0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 %cast.1, i64 68, i1 false)
-      br label %fallthrough.0
-
-    fallthrough.0:                                    ; preds = %else.0, %then.0
-      %cast.3 = bitcast { [16 x i32], i32 }* %a to i8*
-      %cast.4 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.3, i8* align 4 %cast.4, i64 68, i1 false)
-      ret void
-
-    else.0:                                           ; preds = %entry
-      %cast.2 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 bitcast ({ [16 x i32], i32 }* @const.0 to i8*), i64 68, i1 false)
-      br label %fallthrough.0
-    }
+  entry:
+    %p1.addr = alloca i32
+    %a = alloca { [16 x i32], i32 }
+    %tmpv.0 = alloca { [16 x i32], i32 }
+    store i32 %p1, i32* %p1.addr, align 4
+    %p1.ld.0 = load i32, i32* %p1.addr
+    %icmp.0 = icmp slt i32 %p1.ld.0, 7
+    %zext.0 = zext i1 %icmp.0 to i8
+    %trunc.0 = trunc i8 %zext.0 to i1
+    br i1 %trunc.0, label %then.0, label %else.0
+  
+  then.0:                                           ; preds = %entry
+    %cast.0 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
+    %cast.1 = bitcast { [16 x i32], i32 }* %p0 to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 %cast.1, i64 68, i1 false)
+    br label %fallthrough.0
+  
+  fallthrough.0:                                    ; preds = %else.0, %then.0
+    %cast.3 = bitcast { [16 x i32], i32 }* %a to i8*
+    %cast.4 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.3, i8* align 4 %cast.4, i64 68, i1 false)
+    ret void
+  
+  else.0:                                           ; preds = %entry
+    %cast.2 = bitcast { [16 x i32], i32 }* %tmpv.0 to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 bitcast ({ [16 x i32], i32 }* @const.0 to i8*), i64 68, i1 false)
+    br label %fallthrough.0
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(StripDebugInfo);
@@ -1588,19 +1590,19 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define i64 @foo(i8* nest %nest.0, i32 %param1, i32 %param2, i64* %param3) #0 {
-      entry:
-        %param1.addr = alloca i32
-        %param2.addr = alloca i32
-        %param3.addr = alloca i64*
-        %x = alloca i64
-        store i32 %param1, i32* %param1.addr
-        store i32 %param2, i32* %param2.addr
-        store i64* %param3, i64** %param3.addr
-        store i64 0, i64* %x
-        store i64 5, i64* %x
-        %x.ld.0 = load i64, i64* %x
-        ret i64 0
-      }
+  entry:
+    %param1.addr = alloca i32
+    %param2.addr = alloca i32
+    %param3.addr = alloca i64*
+    %x = alloca i64
+    store i32 %param1, i32* %param1.addr, align 4
+    store i32 %param2, i32* %param2.addr, align 4
+    store i64* %param3, i64** %param3.addr, align 8
+    store i64 0, i64* %x, align 8
+    store i64 5, i64* %x, align 8
+    %x.ld.0 = load i64, i64* %x
+    ret i64 0
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(StripDebugInfo);
@@ -1638,31 +1640,31 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define i64 @foo(i8* nest %nest.0, i32 %param1, i32 %param2, i64* %param3) #0 {
-      entry:
-      %tmp.0 = alloca { i64, i64 }
-      %param1.addr = alloca i32
-      %param2.addr = alloca i32
-      %param3.addr = alloca i64*
-      %x = alloca i64
-      %y = alloca { i64, i64 }
-      store i32 %param1, i32* %param1.addr
-      store i32 %param2, i32* %param2.addr
-      store i64* %param3, i64** %param3.addr
-      store i64 0, i64* %x
-      %cast.0 = bitcast { i64, i64 }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i64, i64 }* @const.0 to i8*), i64 16, i1 false)
-      store i64 5, i64* %x
-      %x.ld.0 = load i64, i64* %x
-      %x.ld.1 = load i64, i64* %x
-      %field.0 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp.0, i32 0, i32 0
-      store i64 %x.ld.0, i64* %field.0
-      %field.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp.0, i32 0, i32 1
-      store i64 %x.ld.1, i64* %field.1
-      %cast.1 = bitcast { i64, i64 }* %y to i8*
-      %cast.2 = bitcast { i64, i64 }* %tmp.0 to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 %cast.2, i64 16, i1 false)
-      ret i64 0
-    }
+  entry:
+    %tmp.0 = alloca { i64, i64 }
+    %param1.addr = alloca i32
+    %param2.addr = alloca i32
+    %param3.addr = alloca i64*
+    %x = alloca i64
+    %y = alloca { i64, i64 }
+    store i32 %param1, i32* %param1.addr, align 4
+    store i32 %param2, i32* %param2.addr, align 4
+    store i64* %param3, i64** %param3.addr, align 8
+    store i64 0, i64* %x, align 8
+    %cast.0 = bitcast { i64, i64 }* %y to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i64, i64 }* @const.0 to i8*), i64 16, i1 false)
+    store i64 5, i64* %x, align 8
+    %x.ld.0 = load i64, i64* %x
+    %x.ld.1 = load i64, i64* %x
+    %field.0 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp.0, i32 0, i32 0
+    store i64 %x.ld.0, i64* %field.0, align 8
+    %field.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp.0, i32 0, i32 1
+    store i64 %x.ld.1, i64* %field.1, align 8
+    %cast.1 = bitcast { i64, i64 }* %y to i8*
+    %cast.2 = bitcast { i64, i64 }* %tmp.0 to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 %cast.2, i64 16, i1 false)
+    ret i64 0
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(StripDebugInfo);
@@ -1700,33 +1702,33 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0, i32* %p0, i32* %p1) #0 {
-    entry:
-      %p0.addr = alloca i32*
-      %p1.addr = alloca i32*
-      %tmpv.0 = alloca i32*
-      store i32* %p0, i32** %p0.addr
-      store i32* %p1, i32** %p1.addr
-      %p0.ld.0 = load i32*, i32** %p0.addr
-      %icmp.0 = icmp eq i32* %p0.ld.0, null
-      %zext.0 = zext i1 %icmp.0 to i8
-      %trunc.0 = trunc i8 %zext.0 to i1
-      br i1 %trunc.0, label %then.0, label %else.0
-
-    then.0:                                           ; preds = %entry
-      %p1.ld.0 = load i32*, i32** %p1.addr
-      store i32* %p1.ld.0, i32** %tmpv.0
-      br label %fallthrough.0
-
-    fallthrough.0:                                ; preds = %else.0, %then.0
-      %tmpv.0.ld.0 = load i32*, i32** %tmpv.0
-      store i32 7, i32* %tmpv.0.ld.0
-      ret void
-
-    else.0:                                           ; preds = %entry
-      %p0.ld.1 = load i32*, i32** %p0.addr
-      store i32* %p0.ld.1, i32** %tmpv.0
-      br label %fallthrough.0
-    }
+  entry:
+    %p0.addr = alloca i32*
+    %p1.addr = alloca i32*
+    %tmpv.0 = alloca i32*
+    store i32* %p0, i32** %p0.addr, align 8
+    store i32* %p1, i32** %p1.addr, align 8
+    %p0.ld.0 = load i32*, i32** %p0.addr
+    %icmp.0 = icmp eq i32* %p0.ld.0, null
+    %zext.0 = zext i1 %icmp.0 to i8
+    %trunc.0 = trunc i8 %zext.0 to i1
+    br i1 %trunc.0, label %then.0, label %else.0
+  
+  then.0:                                           ; preds = %entry
+    %p1.ld.0 = load i32*, i32** %p1.addr
+    store i32* %p1.ld.0, i32** %tmpv.0, align 8
+    br label %fallthrough.0
+  
+  fallthrough.0:                                    ; preds = %else.0, %then.0
+    %tmpv.0.ld.0 = load i32*, i32** %tmpv.0
+    store i32 7, i32* %tmpv.0.ld.0, align 4
+    ret void
+  
+  else.0:                                           ; preds = %entry
+    %p0.ld.1 = load i32*, i32** %p0.addr
+    store i32* %p0.ld.1, i32** %tmpv.0, align 8
+    br label %fallthrough.0
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(StripDebugInfo);
@@ -1772,24 +1774,24 @@
   h.mkLocal("r", bf64t, be->unary_expression(OPERATOR_MINUS, veq, loc));
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i8 0, i8* %x
+    store i8 0, i8* %x, align 1
     %x.ld.0 = load i8, i8* %x
     %icmp.0 = icmp ne i8 %x.ld.0, 0
     %xor.0 = xor i1 %icmp.0, true
     %zext.0 = zext i1 %xor.0 to i8
-    store i8 %zext.0, i8* %y
-    store i32 0, i32* %a
+    store i8 %zext.0, i8* %y, align 1
+    store i32 0, i32* %a, align 4
     %a.ld.0 = load i32, i32* %a
     %sub.0 = sub i32 0, %a.ld.0
-    store i32 %sub.0, i32* %b
-    store i64 0, i64* %z
+    store i32 %sub.0, i32* %b, align 4
+    store i64 0, i64* %z, align 8
     %z.ld.0 = load i64, i64* %z
     %xor.1 = xor i64 %z.ld.0, -1
-    store i64 %xor.1, i64* %w
-    store double 0.000000e+00, double* %q
+    store i64 %xor.1, i64* %w, align 8
+    store double 0.000000e+00, double* %q, align 8
     %q.ld.0 = load double, double* %q
     %fsub.0 = fsub double -0.000000e+00, %q.ld.0
-    store double %fsub.0, double* %r
+    store double %fsub.0, double* %r, align 8
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
diff --git a/unittests/BackendCore/BackendFcnTests.cpp b/unittests/BackendCore/BackendFcnTests.cpp
index d87ef1b..a8f1756 100644
--- a/unittests/BackendCore/BackendFcnTests.cpp
+++ b/unittests/BackendCore/BackendFcnTests.cpp
@@ -298,7 +298,7 @@
   h.mkExprStmt(call);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i64 0, i64* %x
+    store i64 0, i64* %x, align 8
     %x.ld.0 = load i64, i64* %x
     %call.0 = call addrspace(0) i64 @llvm.cttz.i64(i64 %x.ld.0, i1 true)
   )RAW_RESULT");
@@ -364,8 +364,8 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i64 0, i64* %x
-    store i64 10101, i64* %y
+    store i64 0, i64* %x, align 8
+    store i64 10101, i64* %y, align 8
     %cast.0 = bitcast i64* %x to i8*
     %cast.1 = bitcast i64* %y to i8*
     %call.0 = call addrspace(0) i32 @memcmp(i8* %cast.0, i8* %cast.1, i64 8)
@@ -426,10 +426,9 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %call.0 = call addrspace(0) i64 @syscall(i8* nest undef, i64 64)
-    store i64 %call.0, i64* %x
-    %call.1 = call addrspace(0) i32 bitcast (i64 (i8*, i64)*
-          @syscall to i32 (i8*, i32)*)(i8* nest undef, i32 32)
-    store i32 %call.1, i32* %y
+    store i64 %call.0, i64* %x, align 8
+    %call.1 = call addrspace(0) i32 bitcast (i64 (i8*, i64)* @syscall to i32 (i8*, i32)*)(i8* nest undef, i32 32)
+    store i32 %call.1, i32* %y, align 4
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -500,13 +499,13 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %call.0 = call addrspace(0) i32 @bar(i8* nest undef)
-    store i32 %call.0, i32* %a
+    store i32 %call.0, i32* %a, align 4
     %call.1 = call addrspace(0) i32 @bar(i8* nest undef)
-    store i32 %call.1, i32* %b
+    store i32 %call.1, i32* %b, align 4
     %call.2 = call addrspace(0) {}* bitcast ({ i32 }* (i8*)* @baz to {}* (i8*)*)(i8* nest undef)
-    store {}* %call.2, {}** %x
+    store {}* %call.2, {}** %x, align 8
     %call.3 = call addrspace(0) { i32 }* @baz(i8* nest undef)
-    store { i32 }* %call.3, { i32 }** %y
+    store { i32 }* %call.3, { i32 }** %y, align 8
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
diff --git a/unittests/BackendCore/BackendPointerExprTests.cpp b/unittests/BackendCore/BackendPointerExprTests.cpp
index 12a7f2b..af4f04c 100644
--- a/unittests/BackendCore/BackendPointerExprTests.cpp
+++ b/unittests/BackendCore/BackendPointerExprTests.cpp
@@ -72,14 +72,14 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i64 10, i64* %y
-    store i64* null, i64** %x
-    store i64* %y, i64** %x
+    store i64 10, i64* %y, align 8
+    store i64* null, i64** %x, align 8
+    store i64* %y, i64** %x, align 8
     %x.ld.0 = load i64*, i64** %x
     %.ld.0 = load i64, i64* %x.ld.0
-    store i64 %.ld.0, i64* %y
+    store i64 %.ld.0, i64* %y, align 8
     %x.ld.1 = load i64*, i64** %x
-    store i64 3, i64* %x.ld.1
+    store i64 3, i64* %x.ld.1, align 8
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -132,14 +132,14 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %cast.0 = bitcast { i64 }* %fdloc1 to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i64 }* @const.0 to i8*), i64 8, i1 false)
-    store { i64 }* %fdloc1, { i64 }** %fploc1
-    store { i64 (i8*, i32, i32, i64*)* }* null, { i64 (i8*, i32, i32, i64*)* }** %fploc2
+    store { i64 }* %fdloc1, { i64 }** %fploc1, align 8
+    store { i64 (i8*, i32, i32, i64*)* }* null, { i64 (i8*, i32, i32, i64*)* }** %fploc2, align 8
     %fploc1.ld.0 = load { i64 }*, { i64 }** %fploc1
     %cast.1 = bitcast { i64 }* %fploc1.ld.0 to { i64 (i8*, i32, i32, i64*)* }*
-    store { i64 (i8*, i32, i32, i64*)* }* %cast.1, { i64 (i8*, i32, i32, i64*)* }** %fploc2
+    store { i64 (i8*, i32, i32, i64*)* }* %cast.1, { i64 (i8*, i32, i32, i64*)* }** %fploc2, align 8
     %fploc2.ld.0 = load { i64 (i8*, i32, i32, i64*)* }*, { i64 (i8*, i32, i32, i64*)* }** %fploc2
     %cast.2 = bitcast { i64 (i8*, i32, i32, i64*)* }* %fploc2.ld.0 to { i64 }*
-    store { i64 }* %cast.2, { i64 }** %fploc1
+    store { i64 }* %cast.2, { i64 }** %fploc1, align 8
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -188,16 +188,16 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp2, R"RAW_RESULT(
-    store i8 0, i8* %b1
-    store i8* null, i8** %pb1
+    store i8 0, i8* %b1, align 1
+    store i8* null, i8** %pb1, align 8
     %pb1.ld.0 = load i8*, i8** %pb1
     %icmp.0 = icmp eq i8* %pb1.ld.0, null
     %zext.0 = zext i1 %icmp.0 to i8
-    store i8 %zext.0, i8* %b1
+    store i8 %zext.0, i8* %b1, align 1
     %pb1.ld.1 = load i8*, i8** %pb1
     %icmp.1 = icmp eq i8* null, %pb1.ld.1
     %zext.1 = zext i1 %icmp.1 to i8
-    store i8 %zext.1, i8* %b1
+    store i8 %zext.1, i8* %b1, align 1
   )RAW_RESULT");
 
   bool isOK2 = h.expectBlock(exp2);
@@ -226,7 +226,7 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %deref.ld.0 = load i32, i32* null
-    store i32 %deref.ld.0, i32* %x
+    store i32 %deref.ld.0, i32* %x, align 4
     %cast.2 = bitcast { i32, i32 }* %y to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 null, i64 8, i1 false)
   )RAW_RESULT");
@@ -321,27 +321,27 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store %CPT.0* null, %CPT.0** %cpv1
-    store %CPT.0* null, %CPT.0** %cpv2
+    store %CPT.0* null, %CPT.0** %cpv1, align 8
+    store %CPT.0* null, %CPT.0** %cpv2, align 8
     %cast.0 = bitcast %CPT.0** %cpv2 to %CPT.0*
-    store %CPT.0* %cast.0, %CPT.0** %cpv1
+    store %CPT.0* %cast.0, %CPT.0** %cpv1, align 8
     %cast.1 = bitcast %CPT.0** %cpv1 to %CPT.0*
-    store %CPT.0* %cast.1, %CPT.0** %cpv2
-    store i8 0, i8* %b1
-    store i8 0, i8* %b2
-    store i8 0, i8* %b3
+    store %CPT.0* %cast.1, %CPT.0** %cpv2, align 8
+    store i8 0, i8* %b1, align 1
+    store i8 0, i8* %b2, align 1
+    store i8 0, i8* %b3, align 1
     %cpv1.ld.0 = load %CPT.0*, %CPT.0** %cpv1
     %cast.2 = bitcast %CPT.0** %cpv2 to %CPT.0***
     %cpv2.ld.0 = load %CPT.0**, %CPT.0*** %cast.2
     %.ld.0 = load %CPT.0*, %CPT.0** %cpv2.ld.0
     %icmp.0 = icmp eq %CPT.0* %cpv1.ld.0, %.ld.0
     %zext.0 = zext i1 %icmp.0 to i8
-    store i8 %zext.0, i8* %b1
+    store i8 %zext.0, i8* %b1, align 1
     %cpv2.ld.1 = load %CPT.0*, %CPT.0** %cpv2
     %cast.3 = bitcast %CPT.0* %cpv2.ld.1 to %CPT.0**
     %icmp.1 = icmp eq %CPT.0** %cpv1, %cast.3
     %zext.1 = zext i1 %icmp.1 to i8
-    store i8 %zext.1, i8* %b2
+    store i8 %zext.1, i8* %b2, align 1
     %cpv1.ld.1 = load %CPT.0*, %CPT.0** %cpv1
     %cast.4 = bitcast %CPT.0** %cpv2 to %CPT.0***
     %cpv2.ld.2 = load %CPT.0**, %CPT.0*** %cast.4
@@ -352,7 +352,7 @@
     %.ld.1 = load %CPT.0*, %CPT.0** %deref.ld.1
     %icmp.2 = icmp eq %CPT.0* %cpv1.ld.1, %.ld.1
     %zext.2 = zext i1 %icmp.2 to i8
-    store i8 %zext.2, i8* %b3
+    store i8 %zext.2, i8* %b3, align 1
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -422,18 +422,18 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store %CPT.0* null, %CPT.0** %x
-    store %CPT.0** null, %CPT.0*** %y
+    store %CPT.0* null, %CPT.0** %x, align 8
+    store %CPT.0** null, %CPT.0*** %y, align 8
     %cast.0 = bitcast %CPT.0*** %y to %CPT.0*
-    store %CPT.0* %cast.0, %CPT.0** %x
-    store %CPT.0** %x, %CPT.0*** %y
-    store i8 0, i8* %b1
+    store %CPT.0* %cast.0, %CPT.0** %x, align 8
+    store %CPT.0** %x, %CPT.0*** %y, align 8
+    store i8 0, i8* %b1, align 1
     %x.ld.0 = load %CPT.0*, %CPT.0** %x
     %y.ld.0 = load %CPT.0**, %CPT.0*** %y
     %.ld.0 = load %CPT.0*, %CPT.0** %y.ld.0
     %icmp.0 = icmp eq %CPT.0* %x.ld.0, %.ld.0
     %zext.0 = zext i1 %icmp.0 to i8
-    store i8 %zext.0, i8* %b1
+    store i8 %zext.0, i8* %b1, align 1
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -479,12 +479,12 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %param3.ld.0 = load i64*, i64** %param3.addr
     %ptroff.0 = getelementptr i64, i64* %param3.ld.0, i32 5
-    store i64 9, i64* %ptroff.0
+    store i64 9, i64* %ptroff.0, align 8
     %param3.ld.1 = load i64*, i64** %param3.addr
     %ptroff.1 = getelementptr i64, i64* %param3.ld.1, i32 7
     %.ptroff.ld.0 = load i64, i64* %ptroff.1
     %trunc.0 = trunc i64 %.ptroff.ld.0 to i32
-    store i32 %trunc.0, i32* %param1.addr
+    store i32 %trunc.0, i32* %param1.addr, align 4
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -516,8 +516,8 @@
   h.mkAssign(vexl, ad3);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i64 0, i64* %x
-    store i64* %x, i64** %param3.addr
+    store i64 0, i64* %x, align 8
+    store i64* %x, i64** %param3.addr, align 8
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -569,9 +569,8 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i32 0, i32* inttoptr (i64 65793 to i32*)
-    store i64 2, i64* getelementptr inbounds ({ i64, i64 }, { i64, i64 }*
-      inttoptr (i64 34661 to { i64, i64 }*), i32 0, i32 1)
+    store i32 0, i32* inttoptr (i64 65793 to i32*), align 4
+    store i64 2, i64* getelementptr inbounds ({ i64, i64 }, { i64, i64 }* inttoptr (i64 34661 to { i64, i64 }*), i32 0, i32 1), align 8
   )RAW_RESULT");
 
   bool isOK = h.expectBlock(exp);
@@ -623,8 +622,8 @@
   h.mkLocal("y", pbefty2);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i64 (i8*, i64, i64, %CFT.0*, %CFT.1*)* null, i64 (i8*, i64, i64, %CFT.0*, %CFT.1*)** %x
-    store i64 (i8*, i64, i64, %CFT.1*, %CFT.0*)* null, i64 (i8*, i64, i64, %CFT.1*, %CFT.0*)** %y
+    store i64 (i8*, i64, i64, %CFT.0*, %CFT.1*)* null, i64 (i8*, i64, i64, %CFT.0*, %CFT.1*)** %x, align 8
+    store i64 (i8*, i64, i64, %CFT.1*, %CFT.0*)* null, i64 (i8*, i64, i64, %CFT.1*, %CFT.0*)** %y, align 8
 
   )RAW_RESULT");
 
diff --git a/unittests/BackendCore/BackendStmtTests.cpp b/unittests/BackendCore/BackendStmtTests.cpp
index 5f3cd2c..8c2c863 100644
--- a/unittests/BackendCore/BackendStmtTests.cpp
+++ b/unittests/BackendCore/BackendStmtTests.cpp
@@ -40,7 +40,7 @@
   Bstatement *is = be->init_statement(func, loc1, mkInt64Const(be, 10));
   ASSERT_TRUE(is != nullptr);
   h.addStmt(is);
-  EXPECT_EQ(repr(is), "store i64 10, i64* %loc1");
+  EXPECT_EQ(repr(is), "store i64 10, i64* %loc1, align 8");
 
   // error handling
   Bvariable *loc2 = be->local_variable(func, "loc2", bi64t, nullptr, true, loc);
@@ -80,11 +80,11 @@
   h.addStmt(as2);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    store i64 0, i64* %loc1
-      store i64 123, i64* %loc1
-      store i64 0, i64* %loc2
-      %loc1.ld.0 = load i64, i64* %loc1
-      store i64 %loc1.ld.0, i64* %loc2
+    store i64 0, i64* %loc1, align 8
+    store i64 123, i64* %loc1, align 8
+    store i64 0, i64* %loc2, align 8
+    %loc1.ld.0 = load i64, i64* %loc1
+    store i64 %loc1.ld.0, i64* %loc2, align 8
   )RAW_RESULT");
   bool isOK = h.expectBlock(exp);
   EXPECT_TRUE(isOK && "Block does not have expected contents");
@@ -158,18 +158,18 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define i64 @foo(i8* nest %nest.0, i32 %param1, i32 %param2, i64* %param3) #0 {
-    entry:
-      %param1.addr = alloca i32
-      %param2.addr = alloca i32
-      %param3.addr = alloca i64*
-      %x = alloca i64
-      store i32 %param1, i32* %param1.addr
-      store i32 %param2, i32* %param2.addr
-      store i64* %param3, i64** %param3.addr
-      store i64 10, i64* %x
-      %x.ld.0 = load i64, i64* %x
-      ret i64 %x.ld.0
-    }
+  entry:
+    %param1.addr = alloca i32
+    %param2.addr = alloca i32
+    %param3.addr = alloca i64*
+    %x = alloca i64
+    store i32 %param1, i32* %param1.addr, align 4
+    store i32 %param2, i32* %param2.addr, align 4
+    store i64* %param3, i64** %param3.addr, align 8
+    store i64 10, i64* %x, align 8
+    %x.ld.0 = load i64, i64* %x
+    ret i64 %x.ld.0
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(StripDebugInfo);
@@ -241,15 +241,16 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
-    entry:
-      %loc1 = alloca i8
-      store i8 0, i8* %loc1
-      call void @bar(i8* nest undef, i8* blockaddress(@foo, %label.0))
-      br label %label.0
-    label.0:                                          ; preds = %entry
-      store i8 0, i8* %loc1
-      ret void
-    }
+  entry:
+    %loc1 = alloca i8
+    store i8 0, i8* %loc1, align 1
+    call void @bar(i8* nest undef, i8* blockaddress(@foo, %label.0))
+    br label %label.0
+  
+  label.0:                                          ; preds = %entry
+    store i8 0, i8* %loc1, align 1
+    ret void
+  }
   )RAW_RESULT");
 
   bool isOK = h.expectValue(func->function(), exp);
@@ -311,34 +312,40 @@
   // verify
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define i64 @foo(i8* nest %nest.0, i32 %param1, i32 %param2, i64* %param3) #0 {
-    entry:
-      %param1.addr = alloca i32
-      %param2.addr = alloca i32
-      %param3.addr = alloca i64*
-      %loc1 = alloca i64
-      %loc2 = alloca i64
-      store i32 %param1, i32* %param1.addr
-      store i32 %param2, i32* %param2.addr
-      store i64* %param3, i64** %param3.addr
-      store i64 0, i64* %loc1
-      store i64 0, i64* %loc2
-      br i1 true, label %then.0, label %else.0
-    then.0:                                           ; preds = %entry
-      br i1 true, label %then.1, label %else.1
-    fallthrough.0:                                    ; preds = %else.0, %fallthrough.1
-      ret i64 10101
-    else.0:                                           ; preds = %entry
-      store i64 456, i64* %loc2
-      br label %fallthrough.0
-    then.1:                                           ; preds = %then.0
-      store i64 123, i64* %loc1
-      br label %fallthrough.1
-    fallthrough.1:                                    ; preds = %else.1, %then.1
-      br label %fallthrough.0
-    else.1:                                           ; preds = %then.0
-      store i64 987, i64* %loc1
-      br label %fallthrough.1
-    }
+  entry:
+    %param1.addr = alloca i32
+    %param2.addr = alloca i32
+    %param3.addr = alloca i64*
+    %loc1 = alloca i64
+    %loc2 = alloca i64
+    store i32 %param1, i32* %param1.addr, align 4
+    store i32 %param2, i32* %param2.addr, align 4
+    store i64* %param3, i64** %param3.addr, align 8
+    store i64 0, i64* %loc1, align 8
+    store i64 0, i64* %loc2, align 8
+    br i1 true, label %then.0, label %else.0
+  
+  then.0:                                           ; preds = %entry
+    br i1 true, label %then.1, label %else.1
+  
+  fallthrough.0:                                    ; preds = %else.0, %fallthrough.1
+    ret i64 10101
+  
+  else.0:                                           ; preds = %entry
+    store i64 456, i64* %loc2, align 8
+    br label %fallthrough.0
+  
+  then.1:                                           ; preds = %then.0
+    store i64 123, i64* %loc1, align 8
+    br label %fallthrough.1
+  
+  fallthrough.1:                                    ; preds = %else.1, %then.1
+    br label %fallthrough.0
+  
+  else.1:                                           ; preds = %then.0
+    store i64 987, i64* %loc1, align 8
+    br label %fallthrough.1
+  }
   )RAW_RESULT");
 
   bool isOK = h.expectValue(func->function(), exp);
@@ -429,64 +436,64 @@
   // verify
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define i64 @foo(i8* nest %nest.0, i32 %param1, i32 %param2, i64* %param3) #0 {
-    entry:
-      %param1.addr = alloca i32
-      %param2.addr = alloca i32
-      %param3.addr = alloca i64*
-      %loc1 = alloca i64
-      %tmpv.0 = alloca i64
-      store i32 %param1, i32* %param1.addr
-      store i32 %param2, i32* %param2.addr
-      store i64* %param3, i64** %param3.addr
-      store i64 0, i64* %loc1
-      %loc1.ld.4 = load i64, i64* %loc1
-      switch i64 %loc1.ld.4, label %default.0 [
-        i64 1, label %case.0
-        i64 2, label %case.0
-        i64 3, label %case.1
-        i64 4, label %case.1
-        i64 5, label %case.2
-      ]
-
-    case.0:                                           ; preds = %entry, %entry
-      %loc1.ld.0 = load i64, i64* %loc1
-      %div.0 = sdiv i64 %loc1.ld.0, 123
-      store i64 %div.0, i64* %loc1
-      br label %label.0
-
-    case.1:                                           ; preds = %entry, %entry
-      %loc1.ld.1 = load i64, i64* %loc1
-      %icmp.0 = icmp sle i64 %loc1.ld.1, 987
-      %zext.0 = zext i1 %icmp.0 to i8
-      %trunc.0 = trunc i8 %zext.0 to i1
-      br i1 %trunc.0, label %then.0, label %else.0
-
-    case.2:                                           ; preds = %entry, %fallthrough.0
-      br label %default.0
-
-    default.0:                                        ; preds = %entry, %case.2
-      store i64 456, i64* %loc1
-      br label %label.0
-
-    label.0:                                          ; preds = %default.0, %case.0
-      ret i64 10101
-
-    then.0:                                           ; preds = %case.1
-      %loc1.ld.3 = load i64, i64* %loc1
-      store i64 %loc1.ld.3, i64* %tmpv.0
-      br label %fallthrough.0
-
-    fallthrough.0:                                    ; preds = %else.0, %then.0
-      %tmpv.0.ld.0 = load i64, i64* %tmpv.0
-      store i64 %tmpv.0.ld.0, i64* %loc1
-      br label %case.2
-
-    else.0:                                           ; preds = %case.1
-      %loc1.ld.2 = load i64, i64* %loc1
-      %mul.0 = mul i64 987, %loc1.ld.2
-      store i64 %mul.0, i64* %tmpv.0
-      br label %fallthrough.0
-    }
+  entry:
+    %param1.addr = alloca i32
+    %param2.addr = alloca i32
+    %param3.addr = alloca i64*
+    %loc1 = alloca i64
+    %tmpv.0 = alloca i64
+    store i32 %param1, i32* %param1.addr, align 4
+    store i32 %param2, i32* %param2.addr, align 4
+    store i64* %param3, i64** %param3.addr, align 8
+    store i64 0, i64* %loc1, align 8
+    %loc1.ld.4 = load i64, i64* %loc1
+    switch i64 %loc1.ld.4, label %default.0 [
+      i64 1, label %case.0
+      i64 2, label %case.0
+      i64 3, label %case.1
+      i64 4, label %case.1
+      i64 5, label %case.2
+    ]
+  
+  case.0:                                           ; preds = %entry, %entry
+    %loc1.ld.0 = load i64, i64* %loc1
+    %div.0 = sdiv i64 %loc1.ld.0, 123
+    store i64 %div.0, i64* %loc1, align 8
+    br label %label.0
+  
+  case.1:                                           ; preds = %entry, %entry
+    %loc1.ld.1 = load i64, i64* %loc1
+    %icmp.0 = icmp sle i64 %loc1.ld.1, 987
+    %zext.0 = zext i1 %icmp.0 to i8
+    %trunc.0 = trunc i8 %zext.0 to i1
+    br i1 %trunc.0, label %then.0, label %else.0
+  
+  case.2:                                           ; preds = %entry, %fallthrough.0
+    br label %default.0
+  
+  default.0:                                        ; preds = %entry, %case.2
+    store i64 456, i64* %loc1, align 8
+    br label %label.0
+  
+  label.0:                                          ; preds = %default.0, %case.0
+    ret i64 10101
+  
+  then.0:                                           ; preds = %case.1
+    %loc1.ld.3 = load i64, i64* %loc1
+    store i64 %loc1.ld.3, i64* %tmpv.0, align 8
+    br label %fallthrough.0
+  
+  fallthrough.0:                                    ; preds = %else.0, %then.0
+    %tmpv.0.ld.0 = load i64, i64* %tmpv.0
+    store i64 %tmpv.0.ld.0, i64* %loc1, align 8
+    br label %case.2
+  
+  else.0:                                           ; preds = %case.1
+    %loc1.ld.2 = load i64, i64* %loc1
+    %mul.0 = mul i64 987, %loc1.ld.2
+    store i64 %mul.0, i64* %tmpv.0, align 8
+    br label %fallthrough.0
+  }
   )RAW_RESULT");
 
   bool isOK = h.expectValue(func->function(), exp);
@@ -578,27 +585,27 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 personality i32 (i32, i32, i64, i8*, i8*)* @__gccgo_personality_v0 {
-    entry:
-      %x = alloca i8
-      store i8 0, i8* %x
-      br label %finish.0
-
-    pad.0:                                            ; preds = %finish.0
-      %ex.0 = landingpad { i8*, i32 }
-              catch i8* null
-      br label %catch.0
-
-    catch.0:                                          ; preds = %pad.0
-      call void @checkdefer(i8* nest undef, i8* %x)
-      br label %finish.0
-
-    finish.0:                                         ; preds = %catch.0, %entry
-      invoke void @deferreturn(i8* nest undef, i8* %x)
-              to label %cont.0 unwind label %pad.0
-
-    cont.0:                                           ; preds = %finish.0
-      ret void
-    }
+  entry:
+    %x = alloca i8
+    store i8 0, i8* %x, align 1
+    br label %finish.0
+  
+  pad.0:                                            ; preds = %finish.0
+    %ex.0 = landingpad { i8*, i32 }
+            catch i8* null
+    br label %catch.0
+  
+  catch.0:                                          ; preds = %pad.0
+    call void @checkdefer(i8* nest undef, i8* %x)
+    br label %finish.0
+  
+  finish.0:                                         ; preds = %catch.0, %entry
+    invoke void @deferreturn(i8* nest undef, i8* %x)
+            to label %cont.0 unwind label %pad.0
+  
+  cont.0:                                           ; preds = %finish.0
+    ret void
+  }
   )RAW_RESULT");
 
   bool isOK = h.expectValue(func->function(), exp);
@@ -691,93 +698,93 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @baz(i8* nest %nest.0) #0 personality i32 (i32, i32, i64, i8*, i8*)* @__gccgo_personality_v0 {
-    entry:
-      %ehtmp.0 = alloca { i8*, i32 }
-      %x = alloca i64
-      %y = alloca i8
-      %sret.actual.0 = alloca { i8, i8 }
-      %sret.actual.1 = alloca { i8, i8 }
-      %finvar.0 = alloca i8
-      store i64 0, i64* %x
-      store i8 0, i8* %y
-      %call.0 = invoke i64 @id(i8* nest undef, i64 99)
-              to label %cont.1 unwind label %pad.1
-
-    finok.0:                                          ; preds = %cont.4
-      store i8 1, i8* %finvar.0
-      br label %finally.0
-
-    finally.0:                                        ; preds = %catchpad.0, %finok.0
-      br label %finish.0
-
-    pad.0:                                            ; preds = %fallthrough.0, %finish.0
-      %ex.0 = landingpad { i8*, i32 }
-              catch i8* null
-      br label %catch.0
-
-    catch.0:                                          ; preds = %pad.0
-      call void @checkdefer(i8* nest undef, i8* %y)
-      br label %finish.0
-
-    finish.0:                                         ; preds = %catch.0, %finally.0
-      invoke void @deferreturn(i8* nest undef, i8* %y)
-              to label %cont.0 unwind label %pad.0
-
-    cont.0:                                           ; preds = %fallthrough.0, %finish.0
-      %fload.0 = load i8, i8* %finvar.0
-      %icmp.0 = icmp eq i8 %fload.0, 1
-      br i1 %icmp.0, label %finret.0, label %finres.0
-
-    pad.1:                                            ; preds = %then.0, %cont.1, %entry
-      %ex.1 = landingpad { i8*, i32 }
-              catch i8* null
-      br label %catch.1
-
-    catch.1:                                          ; preds = %pad.1
-      invoke void @plix(i8* nest undef)
-              to label %cont.4 unwind label %catchpad.0
-
-    catchpad.0:                                       ; preds = %catch.1
-      %ex2.0 = landingpad { i8*, i32 }
-              cleanup
-      store { i8*, i32 } %ex2.0, { i8*, i32 }* %ehtmp.0
-      store i8 0, i8* %finvar.0
-      br label %finally.0
-
-    cont.1:                                           ; preds = %entry
-      store i64 %call.0, i64* %x
-      invoke void @plark(i8* nest undef)
-              to label %cont.2 unwind label %pad.1
-
-    cont.2:                                           ; preds = %cont.1
-      br i1 false, label %then.0, label %else.0
-
-    then.0:                                           ; preds = %cont.2
-      %call.1 = invoke i16 @noret(i8* nest undef)
-              to label %cont.3 unwind label %pad.1
-
-    fallthrough.0:                                    ; preds = %else.0
-      store i64 123, i64* %x
-      store i8 1, i8* %finvar.0
-      invoke void @deferreturn(i8* nest undef, i8* %y)
-              to label %cont.0 unwind label %pad.0
-
-    else.0:                                           ; preds = %cont.2
-      br label %fallthrough.0
-
-    cont.3:                                           ; preds = %then.0
-      unreachable
-
-    cont.4:                                           ; preds = %catch.1
-      br label %finok.0
-
-    finres.0:                                         ; preds = %cont.0
-      %excv.0 = load { i8*, i32 }, { i8*, i32 }* %ehtmp.0
-      resume { i8*, i32 } %excv.0
-
-    finret.0:                                         ; preds = %cont.0
-      ret void
-    }
+  entry:
+    %ehtmp.0 = alloca { i8*, i32 }
+    %x = alloca i64
+    %y = alloca i8
+    %sret.actual.0 = alloca { i8, i8 }
+    %sret.actual.1 = alloca { i8, i8 }
+    %finvar.0 = alloca i8
+    store i64 0, i64* %x, align 8
+    store i8 0, i8* %y, align 1
+    %call.0 = invoke i64 @id(i8* nest undef, i64 99)
+            to label %cont.1 unwind label %pad.1
+  
+  finok.0:                                          ; preds = %cont.4
+    store i8 1, i8* %finvar.0, align 1
+    br label %finally.0
+  
+  finally.0:                                        ; preds = %catchpad.0, %finok.0
+    br label %finish.0
+  
+  pad.0:                                            ; preds = %fallthrough.0, %finish.0
+    %ex.0 = landingpad { i8*, i32 }
+            catch i8* null
+    br label %catch.0
+  
+  catch.0:                                          ; preds = %pad.0
+    call void @checkdefer(i8* nest undef, i8* %y)
+    br label %finish.0
+  
+  finish.0:                                         ; preds = %catch.0, %finally.0
+    invoke void @deferreturn(i8* nest undef, i8* %y)
+            to label %cont.0 unwind label %pad.0
+  
+  cont.0:                                           ; preds = %fallthrough.0, %finish.0
+    %fload.0 = load i8, i8* %finvar.0, align 1
+    %icmp.0 = icmp eq i8 %fload.0, 1
+    br i1 %icmp.0, label %finret.0, label %finres.0
+  
+  pad.1:                                            ; preds = %then.0, %cont.1, %entry
+    %ex.1 = landingpad { i8*, i32 }
+            catch i8* null
+    br label %catch.1
+  
+  catch.1:                                          ; preds = %pad.1
+    invoke void @plix(i8* nest undef)
+            to label %cont.4 unwind label %catchpad.0
+  
+  catchpad.0:                                       ; preds = %catch.1
+    %ex2.0 = landingpad { i8*, i32 }
+            cleanup
+    store { i8*, i32 } %ex2.0, { i8*, i32 }* %ehtmp.0, align 8
+    store i8 0, i8* %finvar.0, align 1
+    br label %finally.0
+  
+  cont.1:                                           ; preds = %entry
+    store i64 %call.0, i64* %x, align 8
+    invoke void @plark(i8* nest undef)
+            to label %cont.2 unwind label %pad.1
+  
+  cont.2:                                           ; preds = %cont.1
+    br i1 false, label %then.0, label %else.0
+  
+  then.0:                                           ; preds = %cont.2
+    %call.1 = invoke i16 @noret(i8* nest undef)
+            to label %cont.3 unwind label %pad.1
+  
+  fallthrough.0:                                    ; preds = %else.0
+    store i64 123, i64* %x, align 8
+    store i8 1, i8* %finvar.0, align 1
+    invoke void @deferreturn(i8* nest undef, i8* %y)
+            to label %cont.0 unwind label %pad.0
+  
+  else.0:                                           ; preds = %cont.2
+    br label %fallthrough.0
+  
+  cont.3:                                           ; preds = %then.0
+    unreachable
+  
+  cont.4:                                           ; preds = %catch.1
+    br label %finok.0
+  
+  finres.0:                                         ; preds = %cont.0
+    %excv.0 = load { i8*, i32 }, { i8*, i32 }* %ehtmp.0, align 8
+    resume { i8*, i32 } %excv.0
+  
+  finret.0:                                         ; preds = %cont.0
+    ret void
+  }
   )RAW_RESULT");
 
   bool isOK = h.expectValue(func->function(), exp);
@@ -880,89 +887,89 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define i64 @baz(i8* nest %nest.0, i64 %p0) #0 personality i32 (i32, i32, i64, i8*, i8*)* @__gccgo_personality_v0 {
-    entry:
-      %ehtmp.0 = alloca { i8*, i32 }
-      %p0.addr = alloca i64
-      %ret = alloca i64
-      %x = alloca i8
-      %finvar.0 = alloca i8
-      store i64 %p0, i64* %p0.addr
-      store i64 0, i64* %ret
-      store i8 0, i8* %x
-      %call.0 = invoke i64 @splat(i8* nest undef, i64 99)
-              to label %cont.1 unwind label %pad.1
-
-    finok.0:                                          ; preds = %cont.2
-      store i8 1, i8* %finvar.0
-      br label %finally.0
-
-    finally.0:                                        ; preds = %catchpad.0, %finok.0
-      br label %finish.0
-
-    pad.0:                                            ; preds = %else.0, %then.0, %finish.0
-      %ex.0 = landingpad { i8*, i32 }
-              catch i8* null
-      br label %catch.0
-
-    catch.0:                                          ; preds = %pad.0
-      call void @checkdefer(i8* nest undef, i8* %x)
-      br label %finish.0
-
-    finish.0:                                         ; preds = %catch.0, %finally.0
-      invoke void @deferreturn(i8* nest undef, i8* %x)
-              to label %cont.0 unwind label %pad.0
-
-    cont.0:                                           ; preds = %else.0, %then.0, %finish.0
-      %fload.0 = load i8, i8* %finvar.0
-      %icmp.1 = icmp eq i8 %fload.0, 1
-      br i1 %icmp.1, label %finret.0, label %finres.0
-
-    pad.1:                                            ; preds = %entry
-      %ex.1 = landingpad { i8*, i32 }
-              catch i8* null
-      br label %catch.1
-
-    catch.1:                                          ; preds = %pad.1
-      %call.1 = invoke i64 @splat(i8* nest undef, i64 13)
-              to label %cont.2 unwind label %catchpad.0
-
-    catchpad.0:                                       ; preds = %catch.1
-      %ex2.0 = landingpad { i8*, i32 }
-              cleanup
-      store { i8*, i32 } %ex2.0, { i8*, i32 }* %ehtmp.0
-      store i8 0, i8* %finvar.0
-      br label %finally.0
-
-    cont.1:                                           ; preds = %entry
-      %icmp.0 = icmp eq i64 %call.0, 88
-      %zext.0 = zext i1 %icmp.0 to i8
-      %trunc.0 = trunc i8 %zext.0 to i1
-      br i1 %trunc.0, label %then.0, label %else.0
-
-    then.0:                                           ; preds = %cont.1
-      store i64 22, i64* %ret
-      store i8 1, i8* %finvar.0
-      invoke void @deferreturn(i8* nest undef, i8* %x)
-              to label %cont.0 unwind label %pad.0
-
-    else.0:                                           ; preds = %cont.1
-      %p0.ld.0 = load i64, i64* %p0.addr
-      store i64 %p0.ld.0, i64* %ret
-      store i8 1, i8* %finvar.0
-      invoke void @deferreturn(i8* nest undef, i8* %x)
-              to label %cont.0 unwind label %pad.0
-
-    cont.2:                                           ; preds = %catch.1
-      br label %finok.0
-
-    finres.0:                                         ; preds = %cont.0
-      %excv.0 = load { i8*, i32 }, { i8*, i32 }* %ehtmp.0
-      resume { i8*, i32 } %excv.0
-
-    finret.0:                                         ; preds = %cont.0
-      %ret.ld.1 = load i64, i64* %ret
-      ret i64 %ret.ld.1
-    }
+  entry:
+    %ehtmp.0 = alloca { i8*, i32 }
+    %p0.addr = alloca i64
+    %ret = alloca i64
+    %x = alloca i8
+    %finvar.0 = alloca i8
+    store i64 %p0, i64* %p0.addr, align 8
+    store i64 0, i64* %ret, align 8
+    store i8 0, i8* %x, align 1
+    %call.0 = invoke i64 @splat(i8* nest undef, i64 99)
+            to label %cont.1 unwind label %pad.1
+  
+  finok.0:                                          ; preds = %cont.2
+    store i8 1, i8* %finvar.0, align 1
+    br label %finally.0
+  
+  finally.0:                                        ; preds = %catchpad.0, %finok.0
+    br label %finish.0
+  
+  pad.0:                                            ; preds = %else.0, %then.0, %finish.0
+    %ex.0 = landingpad { i8*, i32 }
+            catch i8* null
+    br label %catch.0
+  
+  catch.0:                                          ; preds = %pad.0
+    call void @checkdefer(i8* nest undef, i8* %x)
+    br label %finish.0
+  
+  finish.0:                                         ; preds = %catch.0, %finally.0
+    invoke void @deferreturn(i8* nest undef, i8* %x)
+            to label %cont.0 unwind label %pad.0
+  
+  cont.0:                                           ; preds = %else.0, %then.0, %finish.0
+    %fload.0 = load i8, i8* %finvar.0, align 1
+    %icmp.1 = icmp eq i8 %fload.0, 1
+    br i1 %icmp.1, label %finret.0, label %finres.0
+  
+  pad.1:                                            ; preds = %entry
+    %ex.1 = landingpad { i8*, i32 }
+            catch i8* null
+    br label %catch.1
+  
+  catch.1:                                          ; preds = %pad.1
+    %call.1 = invoke i64 @splat(i8* nest undef, i64 13)
+            to label %cont.2 unwind label %catchpad.0
+  
+  catchpad.0:                                       ; preds = %catch.1
+    %ex2.0 = landingpad { i8*, i32 }
+            cleanup
+    store { i8*, i32 } %ex2.0, { i8*, i32 }* %ehtmp.0, align 8
+    store i8 0, i8* %finvar.0, align 1
+    br label %finally.0
+  
+  cont.1:                                           ; preds = %entry
+    %icmp.0 = icmp eq i64 %call.0, 88
+    %zext.0 = zext i1 %icmp.0 to i8
+    %trunc.0 = trunc i8 %zext.0 to i1
+    br i1 %trunc.0, label %then.0, label %else.0
+  
+  then.0:                                           ; preds = %cont.1
+    store i64 22, i64* %ret, align 8
+    store i8 1, i8* %finvar.0, align 1
+    invoke void @deferreturn(i8* nest undef, i8* %x)
+            to label %cont.0 unwind label %pad.0
+  
+  else.0:                                           ; preds = %cont.1
+    %p0.ld.0 = load i64, i64* %p0.addr
+    store i64 %p0.ld.0, i64* %ret, align 8
+    store i8 1, i8* %finvar.0, align 1
+    invoke void @deferreturn(i8* nest undef, i8* %x)
+            to label %cont.0 unwind label %pad.0
+  
+  cont.2:                                           ; preds = %catch.1
+    br label %finok.0
+  
+  finres.0:                                         ; preds = %cont.0
+    %excv.0 = load { i8*, i32 }, { i8*, i32 }* %ehtmp.0, align 8
+    resume { i8*, i32 } %excv.0
+  
+  finret.0:                                         ; preds = %cont.0
+    %ret.ld.1 = load i64, i64* %ret
+    ret i64 %ret.ld.1
+  }
   )RAW_RESULT");
 
   bool isOK = h.expectValue(func->function(), exp);
diff --git a/unittests/BackendCore/BackendVarTests.cpp b/unittests/BackendCore/BackendVarTests.cpp
index c62b087..3c78a51 100644
--- a/unittests/BackendCore/BackendVarTests.cpp
+++ b/unittests/BackendCore/BackendVarTests.cpp
@@ -163,7 +163,7 @@
                                            false, loc, &inits);
   ASSERT_TRUE(tvar != nullptr);
   ASSERT_TRUE(inits != nullptr);
-  EXPECT_EQ(repr(inits), "store i64 64, i64* %tmpv.0");
+  EXPECT_EQ(repr(inits), "store i64 64, i64* %tmpv.0, align 8");
 
   h.addStmt(inits);
 
@@ -535,25 +535,25 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
-      entry:
-      %x = alloca i32
-      %y = alloca { i32, i32 }
-      %0 = bitcast i32* %x to i8*
-      call void @llvm.lifetime.start.p0i8(i64 4, i8* %0)
-      %1 = bitcast { i32, i32 }* %y to i8*
-      call void @llvm.lifetime.start.p0i8(i64 8, i8* %1)
-      store i32 0, i32* %x
-      %cast.0 = bitcast { i32, i32 }* %y to i8*
-      call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 bitcast ({ i32, i32 }* @const.0 to i8*), i64 8, i1 false)
-      %field.0 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %y, i32 0, i32 1
-      %y.field.ld.0 = load i32, i32* %field.0
-      store i32 %y.field.ld.0, i32* %x
-      %2 = bitcast i32* %x to i8*
-      call void @llvm.lifetime.end.p0i8(i64 4, i8* %2)
-      %3 = bitcast { i32, i32 }* %y to i8*
-      call void @llvm.lifetime.end.p0i8(i64 8, i8* %3)
-      ret void
-    }
+  entry:
+    %x = alloca i32
+    %y = alloca { i32, i32 }
+    %0 = bitcast i32* %x to i8*
+    call void @llvm.lifetime.start.p0i8(i64 4, i8* %0)
+    %1 = bitcast { i32, i32 }* %y to i8*
+    call void @llvm.lifetime.start.p0i8(i64 8, i8* %1)
+    store i32 0, i32* %x, align 4
+    %cast.0 = bitcast { i32, i32 }* %y to i8*
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 bitcast ({ i32, i32 }* @const.0 to i8*), i64 8, i1 false)
+    %field.0 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %y, i32 0, i32 1
+    %y.field.ld.0 = load i32, i32* %field.0
+    store i32 %y.field.ld.0, i32* %x, align 4
+    %2 = bitcast i32* %x to i8*
+    call void @llvm.lifetime.end.p0i8(i64 4, i8* %2)
+    %3 = bitcast { i32, i32 }* %y to i8*
+    call void @llvm.lifetime.end.p0i8(i64 8, i8* %3)
+    ret void
+  }
   )RAW_RESULT");
 
   bool isOK = h.expectValue(func->function(), exp);