gollvm: sync with trunk

Sync with LLVM trunk at revision ede6005e709. Details:
- adapt to more changes in how load/store alignment is specified
- adapt to new signatures for creating alloca instructions
- remaster unit test expected results (again alignment related)

Fixes golang/go#39109.

Change-Id: Ida665cba384386ed0ae6b78024e62cbc3c126c2a
Reviewed-on: https://go-review.googlesource.com/c/gollvm/+/234337
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/bridge/go-llvm-bfunction.cpp b/bridge/go-llvm-bfunction.cpp
index 68e0282..55cdb9e 100644
--- a/bridge/go-llvm-bfunction.cpp
+++ b/bridge/go-llvm-bfunction.cpp
@@ -77,7 +77,12 @@
 llvm::Instruction *Bfunction::addAlloca(llvm::Type *typ,
                                         const std::string &name)
 {
-  llvm::Instruction *inst = new llvm::AllocaInst(typ, 0);
+  llvm::Instruction *insBefore = nullptr;
+  TypeManager *tm = abiOracle_->tm();
+  llvm::Align aaAlign = tm->datalayout()->getPrefTypeAlign(typ);
+  llvm::Value *aaSize = nullptr;
+  llvm::Instruction *inst = new llvm::AllocaInst(typ, 0, aaSize, aaAlign,
+                                                 name, insBefore);
   if (! name.empty())
     inst->setName(name);
   allocas_.push_back(inst);
@@ -290,7 +295,13 @@
 Bfunction::createLabelAddressPlaceholder(Btype *btype)
 {
   std::string name(namegen("labeladdrplaceholder"));
-  llvm::Instruction *inst = new llvm::AllocaInst(btype->type(), 0);
+  TypeManager *tm = abiOracle_->tm();
+  llvm::Type *lltype = btype->type();
+  llvm::Instruction *insBefore = nullptr;
+  llvm::Align aaAlign = tm->datalayout()->getPrefTypeAlign(lltype);
+  llvm::Value *aaSize = nullptr;
+  llvm::Instruction *inst = new llvm::AllocaInst(lltype, 0, aaSize, aaAlign,
+                                                 name, insBefore);
   labelAddressPlaceholders_.insert(inst);
   return inst;
 }
diff --git a/bridge/go-llvm-bnode.cpp b/bridge/go-llvm-bnode.cpp
index 0e8e006..589a881 100644
--- a/bridge/go-llvm-bnode.cpp
+++ b/bridge/go-llvm-bnode.cpp
@@ -20,6 +20,7 @@
 #include "go-system.h"
 
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Function.h"
@@ -514,15 +515,17 @@
 }
 
 Bvariable *BnodeBuilder::mkTempVar(Btype *varType,
+                                   TypeManager *tm,
                                    Location loc,
                                    const std::string &name)
 {
   assert(varType);
   llvm::Type *typ = varType->type();
-  llvm::Instruction *inst = new llvm::AllocaInst(typ, 0);
-  if (! name.empty())
-    inst->setName(name);
-
+  llvm::Instruction *insBefore = nullptr;
+  llvm::Align aaAlign = tm->datalayout()->getPrefTypeAlign(typ);
+  llvm::Value *aaSize = nullptr;
+  llvm::Instruction *inst = new llvm::AllocaInst(typ, 0, aaSize, aaAlign,
+                                                 name, insBefore);
   Bvariable *tvar = new Bvariable(varType, loc, name, LocalVar, true, inst);
   tempvars_[inst] = tvar;
   tvar->markAsTemporary();
diff --git a/bridge/go-llvm-bnode.h b/bridge/go-llvm-bnode.h
index fff709f..be07427 100644
--- a/bridge/go-llvm-bnode.h
+++ b/bridge/go-llvm-bnode.h
@@ -35,6 +35,7 @@
 class SwitchDescriptor;
 class IntegrityVisitor;
 class Llvm_backend;
+class TypeManager;
 
 // Use when deleting a Bnode subtree. Controls whether to delete just
 // the Bnode objects, just the LLVM instructions they contain, or both.
@@ -333,7 +334,8 @@
   // have this interface here, since IR construction methods will
   // always have access to a BnodeBuilder but may not have the current
   // function we're processing.
-  Bvariable *mkTempVar(Btype *varType, Location loc, const std::string &name);
+  Bvariable *mkTempVar(Btype *varType, TypeManager *tm,
+                       Location loc, const std::string &name);
 
   // This helper looks up the specified variable (as identified by its
   // alloca) to see if it is an unparented temp created during IR
diff --git a/bridge/go-llvm-builtins.cpp b/bridge/go-llvm-builtins.cpp
index 779aba5..5d784d2 100644
--- a/bridge/go-llvm-builtins.cpp
+++ b/bridge/go-llvm-builtins.cpp
@@ -419,7 +419,7 @@
       llvmOrder(llvm::cast<llvm::ConstantInt>(args[1])->getZExtValue()) :
       llvm::AtomicOrdering::SequentiallyConsistent;
   load->setAtomic(o);
-  load->setAlignment(llvm::MaybeAlign(sz));
+  load->setAlignment(llvm::Align(sz));
   return load;
 }
 
@@ -449,7 +449,7 @@
       llvmOrder(llvm::cast<llvm::ConstantInt>(args[2])->getZExtValue()) :
       llvm::AtomicOrdering::SequentiallyConsistent;
   store->setAtomic(o);
-  store->setAlignment(llvm::MaybeAlign(sz));
+  store->setAlignment(llvm::Align(sz));
   return store;
 }
 
diff --git a/bridge/go-llvm.cpp b/bridge/go-llvm.cpp
index 4bd0dce..1872f86 100644
--- a/bridge/go-llvm.cpp
+++ b/bridge/go-llvm.cpp
@@ -613,7 +613,12 @@
     ldname += ".ld";
     ldname = namegen(ldname);
     llvm::Type *vt = spaceVal->getType()->getPointerElementType();
-    llvm::Instruction *loadInst = new llvm::LoadInst(vt, spaceVal, ldname);
+    llvm::Instruction *insBefore = nullptr;
+    llvm::Align ldAlign = datalayout_->getABITypeAlign(vt);
+    bool isVolatile = false;
+    llvm::Instruction *loadInst = new llvm::LoadInst(vt, spaceVal, ldname,
+                                                     isVolatile, ldAlign,
+                                                     insBefore);
     rval = nbuilder_.mkDeref(loadResultType, loadInst, space, loc);
     rval->appendInstruction(loadInst);
   } else {
@@ -1018,7 +1023,8 @@
   Bvariable *tvar = nullptr;
   if (!storage) {
     std::string tname(namegen("tmp"));
-    tvar = nbuilder_.mkTempVar(expr->btype(), expr->location(), tname);
+    tvar = nbuilder_.mkTempVar(expr->btype(), typeManager(),
+                               expr->location(), tname);
     assert(tvar != errorVariable_.get());
     storage = tvar->value();
     setPending = true;
@@ -1057,7 +1063,8 @@
 Llvm_backend::makeTempVar(Bexpression *expr, Location location) {
   assert(expr);
   std::string tname(namegen("tmp"));
-  Bvariable *var = nbuilder_.mkTempVar(expr->btype(), location, tname);
+  Bvariable *var = nbuilder_.mkTempVar(expr->btype(),  typeManager(),
+                                       location, tname);
   assert(var != errorVariable_.get());
   Bfunction *dummyFcn = errorFunction_.get();
   Bstatement *init = makeInitStatement(dummyFcn, var, expr);
@@ -3588,7 +3595,11 @@
 
   // Create temporary into which caught result will be stored
   std::string tag(be_->namegen("ehtmp"));
-  llvm::Instruction *ai = new llvm::AllocaInst(eht, 0, tag);
+  llvm::Instruction *insBefore = nullptr;
+  llvm::Align aaAlign = be_->datalayout().getPrefTypeAlign(eht);
+  llvm::Value *aaSize = nullptr;
+  llvm::Instruction *ai = new llvm::AllocaInst(eht, 0, aaSize, aaAlign,
+                                               tag, insBefore);
   temporariesDiscovered_.insert(ai);
   newTemporaries_.push_back(ai);
 
diff --git a/unittests/BackendCore/BackendArrayStruct.cpp b/unittests/BackendCore/BackendArrayStruct.cpp
index a1f9eb0..bdbd2ae 100644
--- a/unittests/BackendCore/BackendArrayStruct.cpp
+++ b/unittests/BackendCore/BackendArrayStruct.cpp
@@ -84,12 +84,12 @@
     store { i8*, i32 }* %loc1, { i8*, i32 }** %loc2, align 8
     store i32 0, i32* %x, align 4
     %field.0 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %loc1, i32 0, i32 1
-    %loc1.field.ld.0 = load i32, i32* %field.0
+    %loc1.field.ld.0 = load i32, i32* %field.0, align 4
     store i32 %loc1.field.ld.0, i32* %x, align 4
     store i8 0, i8* %b2, align 1
     %field.1 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %loc1, i32 0, i32 0
     store i8* %b2, i8** %field.1, align 8
-    %loc2.ld.0 = load { i8*, i32 }*, { i8*, i32 }** %loc2
+    %loc2.ld.0 = load { i8*, i32 }*, { i8*, i32 }** %loc2, align 8
     %field.2 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %loc2.ld.0, i32 0, i32 1
     store i32 2, i32* %field.2, align 4
   )RAW_RESULT");
@@ -147,19 +147,19 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
   entry:
-    %tmp.0 = alloca { i8*, i32 }
-    %x = alloca i32
-    %y = alloca i32
-    %z = alloca i32
+    %tmp.0 = alloca { i8*, i32 }, align 8
+    %x = alloca i32, align 4
+    %y = alloca i32, align 4
+    %z = alloca i32, align 4
     store i32 0, i32* %x, align 4
     store i32 0, i32* %y, align 4
-    %y.ld.0 = load i32, i32* %y
+    %y.ld.0 = load i32, i32* %y, align 4
     %field.0 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %tmp.0, i32 0, i32 0
     store i8* null, i8** %field.0, align 8
     %field.1 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %tmp.0, i32 0, i32 1
     store i32 %y.ld.0, i32* %field.1, align 4
     %field.2 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %tmp.0, i32 0, i32 1
-    %.field.ld.0 = load i32, i32* %field.2
+    %.field.ld.0 = load i32, i32* %field.2, align 4
     store i32 %.field.ld.0, i32* %x, align 4
     store i32 0, i32* %z, align 4
     store i32 42, i32* %z, align 4
@@ -233,14 +233,14 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
   entry:
-    %tmp.0 = alloca [4 x i64]
-    %x = alloca i64
-    %y = alloca i64
-    %z = alloca i64
-    %w = alloca i64
+    %tmp.0 = alloca [4 x i64], align 8
+    %x = alloca i64, align 8
+    %y = alloca i64, align 8
+    %z = alloca i64, align 8
+    %w = alloca i64, align 8
     store i64 0, i64* %x, align 8
     store i64 0, i64* %y, align 8
-    %y.ld.0 = load i64, i64* %y
+    %y.ld.0 = load i64, i64* %y, align 8
     %index.0 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 0
     store i64 %y.ld.0, i64* %index.0, align 8
     %index.1 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 1
@@ -250,14 +250,14 @@
     %index.3 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 3
     store i64 1, i64* %index.3, align 8
     %index.4 = getelementptr [4 x i64], [4 x i64]* %tmp.0, i32 0, i32 1
-    %.index.ld.0 = load i64, i64* %index.4
+    %.index.ld.0 = load i64, i64* %index.4, align 8
     store i64 %.index.ld.0, i64* %x, align 8
     store i64 0, i64* %z, align 8
     store i64 3, i64* %z, align 8
     store i64 0, i64* %w, align 8
-    %x.ld.0 = load i64, i64* %x
+    %x.ld.0 = load i64, i64* %x, align 8
     %index.5 = getelementptr [4 x i64], [4 x i64]* @const.0, i32 0, i64 %x.ld.0
-    %.index.ld.1 = load i64, i64* %index.5
+    %.index.ld.1 = load i64, i64* %index.5, align 8
     store i64 %.index.ld.1, i64* %w, align 8
     ret void
   }
@@ -311,7 +311,7 @@
     %cast.1 = bitcast [4 x i64]* %ab to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.1, i8* align 8 bitcast ([4 x i64]* @const.1 to i8*), i64 32, i1 false)
     store i64 0, i64* %z, align 8
-    %z.ld.0 = load i64, i64* %z
+    %z.ld.0 = load i64, i64* %z, align 8
     %index.0 = getelementptr [4 x i64], [4 x i64]* %ac, i32 0, i32 0
     store i64 0, i64* %index.0, align 8
     %index.1 = getelementptr [4 x i64], [4 x i64]* %ac, i32 0, i32 1
@@ -370,7 +370,7 @@
     %cast.0 = bitcast { i32*, i32 }* %loc1 to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i32*, i32 }* @const.0 to i8*), i64 16, i1 false)
     %field.0 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %loc1, i32 0, i32 1
-    %loc1.field.ld.0 = load i32, i32* %field.0
+    %loc1.field.ld.0 = load i32, i32* %field.0, align 4
     %field.1 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %loc2, i32 0, i32 0
     store i32* %param1.addr, i32** %field.1, align 8
     %field.2 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %loc2, i32 0, i32 1
@@ -472,8 +472,8 @@
   h.mkAssign(dex, scon);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    %p0.ld.0 = load { i32*, i32 }*, { i32*, i32 }** %p0.addr
-    %p1.ld.0 = load i32*, i32** %p1.addr
+    %p0.ld.0 = load { i32*, i32 }*, { i32*, i32 }** %p0.addr, align 8
+    %p1.ld.0 = load i32*, i32** %p1.addr, align 8
     %field.0 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %p0.ld.0, i32 0, i32 0
     store i32* %p1.ld.0, i32** %field.0, align 8
     %field.1 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %p0.ld.0, i32 0, i32 1
@@ -526,9 +526,9 @@
   h.mkLocal("t2", s1t, scon2);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    %x.ld.0 = load i32, i32* @x
+    %x.ld.0 = load i32, i32* @x, align 4
     store i32 %x.ld.0, i32* getelementptr inbounds ({ i32 }, { i32 }* @t, i32 0, i32 0), align 4
-    %t.field.ld.0 = load i32, i32* getelementptr inbounds ({ i32 }, { i32 }* @t, i32 0, i32 0)
+    %t.field.ld.0 = load i32, i32* getelementptr inbounds ({ i32 }, { i32 }* @t, i32 0, i32 0), align 4
     %field.2 = getelementptr inbounds { i32 }, { i32 }* %t2, i32 0, i32 0
     store i32 %t.field.ld.0, i32* %field.2, align 4
   )RAW_RESULT");
@@ -583,12 +583,12 @@
     %cast.0 = bitcast [4 x i64]* %aa to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ([4 x i64]* @const.0 to i8*), i64 32, i1 false)
     %index.0 = getelementptr [4 x i64], [4 x i64]* %aa, i32 0, i32 1
-    %aa.index.ld.0 = load i64, i64* %index.0
+    %aa.index.ld.0 = load i64, i64* %index.0, align 8
     %index.1 = getelementptr [4 x i64], [4 x i64]* %aa, i32 0, i64 %aa.index.ld.0
     %index.2 = getelementptr [4 x i64], [4 x i64]* %aa, i32 0, i64 3
-    %aa.index.ld.1 = load i64, i64* %index.2
+    %aa.index.ld.1 = load i64, i64* %index.2, align 8
     %index.3 = getelementptr [4 x i64], [4 x i64]* %aa, i32 0, i64 %aa.index.ld.1
-    %aa.index.ld.2 = load i64, i64* %index.3
+    %aa.index.ld.2 = load i64, i64* %index.3, align 8
     store i64 %aa.index.ld.2, i64* %index.1, align 8
   )RAW_RESULT");
 
@@ -652,10 +652,10 @@
     %cast.0 = bitcast [10 x { i8, [4 x { i64, i64 }*], i8 }*]* %t1 to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ([10 x { i8, [4 x { i64, i64 }*], i8 }*]* @const.0 to i8*), i64 80, i1 false)
     %index.0 = getelementptr [10 x { i8, [4 x { i64, i64 }*], i8 }*], [10 x { i8, [4 x { i64, i64 }*], i8 }*]* %t1, i32 0, i32 7
-    %t1.index.ld.0 = load { i8, [4 x { i64, i64 }*], i8 }*, { i8, [4 x { i64, i64 }*], i8 }** %index.0
+    %t1.index.ld.0 = load { i8, [4 x { i64, i64 }*], i8 }*, { i8, [4 x { i64, i64 }*], i8 }** %index.0, align 8
     %field.0 = getelementptr inbounds { i8, [4 x { i64, i64 }*], i8 }, { i8, [4 x { i64, i64 }*], i8 }* %t1.index.ld.0, i32 0, i32 1
     %index.1 = getelementptr [4 x { i64, i64 }*], [4 x { i64, i64 }*]* %field.0, i32 0, i32 3
-    %.field.index.ld.0 = load { i64, i64 }*, { i64, i64 }** %index.1
+    %.field.index.ld.0 = load { i64, i64 }*, { i64, i64 }** %index.1, align 8
     %field.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %.field.index.ld.0, i32 0, i32 0
     store i64 5, i64* %field.1, align 8
     )RAW_RESULT");
@@ -681,12 +681,12 @@
 
     DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %index.2 = getelementptr [10 x { i8, [4 x { i64, i64 }*], i8 }*], [10 x { i8, [4 x { i64, i64 }*], i8 }*]* %t1, i32 0, i32 0
-    %t1.index.ld.1 = load { i8, [4 x { i64, i64 }*], i8 }*, { i8, [4 x { i64, i64 }*], i8 }** %index.2
+    %t1.index.ld.1 = load { i8, [4 x { i64, i64 }*], i8 }*, { i8, [4 x { i64, i64 }*], i8 }** %index.2, align 8
     %field.2 = getelementptr inbounds { i8, [4 x { i64, i64 }*], i8 }, { i8, [4 x { i64, i64 }*], i8 }* %t1.index.ld.1, i32 0, i32 1
     %index.3 = getelementptr [4 x { i64, i64 }*], [4 x { i64, i64 }*]* %field.2, i32 0, i32 0
-    %.field.index.ld.1 = load { i64, i64 }*, { i64, i64 }** %index.3
+    %.field.index.ld.1 = load { i64, i64 }*, { i64, i64 }** %index.3, align 8
     %field.3 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %.field.index.ld.1, i32 0, i32 1
-    %.field.ld.0 = load i64, i64* %field.3
+    %.field.ld.0 = load i64, i64* %field.3, align 8
     store i64 %.field.ld.0, i64* %q, align 8
     )RAW_RESULT");
 
diff --git a/unittests/BackendCore/BackendCABIOracleTests.cpp b/unittests/BackendCore/BackendCABIOracleTests.cpp
index 7edf2e8..dc09b34 100644
--- a/unittests/BackendCore/BackendCABIOracleTests.cpp
+++ b/unittests/BackendCore/BackendCABIOracleTests.cpp
@@ -532,9 +532,9 @@
   Bstatement *rst2 = h.mkReturn(rvals2, FcnTestHarness::NoAppend);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    %p3.ld.0 = load i8, i8* %p3.addr
+    %p3.ld.0 = load i8, i8* %p3.addr, align 1
     %sub.0 = sub i8 %p3.ld.0, 1
-    %p4.ld.0 = load i8, i8* %p4.addr
+    %p4.ld.0 = load i8, i8* %p4.addr, align 1
     %cast.1 = bitcast { float, float, i16, i16, i16 }* %p0.addr to { <2 x float>, i48 }*
     %field0.0 = getelementptr inbounds { <2 x float>, i48 }, { <2 x float>, i48 }* %cast.1, i32 0, i32 0
     %ld.1 = load <2 x float>, <2 x float>* %field0.0, align 8
@@ -651,9 +651,9 @@
   Bstatement *rst2 = h.mkReturn(rvals2, FcnTestHarness::NoAppend);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    %p3.ld.0 = load i8, i8* %p3.addr
+    %p3.ld.0 = load i8, i8* %p3.addr, align 1
     %sub.0 = sub i8 %p3.ld.0, 1
-    %p4.ld.0 = load i8, i8* %p4.addr
+    %p4.ld.0 = load i8, i8* %p4.addr, align 1
     %cast.1 = bitcast { float, float, i16, i16, i16 }* %p0.addr to { i64, i48 }*
     %field0.0 = getelementptr inbounds { i64, i48 }, { i64, i48 }* %cast.1, i32 0, i32 0
     %ld.1 = load i64, i64* %field0.0, align 8
diff --git a/unittests/BackendCore/BackendCallTests.cpp b/unittests/BackendCore/BackendCallTests.cpp
index a1df19a..c8f4f21 100644
--- a/unittests/BackendCore/BackendCallTests.cpp
+++ b/unittests/BackendCore/BackendCallTests.cpp
@@ -49,7 +49,7 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     %call.0 = call addrspace(0) i64 @foo(i8* nest undef, i32 3, i32 6, i64* null)
     store i64 %call.0, i64* %x, align 8
-    %x.ld.0 = load i64, i64* %x
+    %x.ld.0 = load i64, i64* %x, align 8
     ret i64 %x.ld.0
   )RAW_RESULT");
 
@@ -143,7 +143,7 @@
 
   {
     DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    %p0.ld.0 = load i8*, i8** %p0.addr
+    %p0.ld.0 = load i8*, i8** %p0.addr, align 8
     %field.0 = getelementptr inbounds { i8*, i32*, i64*, i64 }, { i8*, i32*, i64*, i64 }* %tmp.0, i32 0, i32 0
     store i8* %p0.ld.0, i8** %field.0, align 8
     %field.1 = getelementptr inbounds { i8*, i32*, i64*, i64 }, { i8*, i32*, i64*, i64 }* %tmp.0, i32 0, i32 1
diff --git a/unittests/BackendCore/BackendDebugEmit.cpp b/unittests/BackendCore/BackendDebugEmit.cpp
index 352ab0d..622718d 100644
--- a/unittests/BackendCore/BackendDebugEmit.cpp
+++ b/unittests/BackendCore/BackendDebugEmit.cpp
@@ -44,7 +44,7 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
   entry:
-    %x = alloca i32
+    %x = alloca i32, align 4
     store i32 0, i32* %x, align 4
     call void @llvm.dbg.declare(metadata i32* %x, metadata !5, metadata !DIExpression()), !dbg !12
     ret void
@@ -212,10 +212,10 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 !dbg !5 {
-    entry:
-      %x = alloca i32
-      ret void, !dbg !10
-    }
+  entry:
+    %x = alloca i32, align 4
+    ret void, !dbg !10
+  }
   )RAW_RESULT");
 
   bool broken = h.finish(PreserveDebugInfo);
diff --git a/unittests/BackendCore/BackendExprTests.cpp b/unittests/BackendCore/BackendExprTests.cpp
index cc57995..892547c 100644
--- a/unittests/BackendCore/BackendExprTests.cpp
+++ b/unittests/BackendCore/BackendExprTests.cpp
@@ -295,11 +295,11 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    %param3.ld.0 = load i64*, i64** %param3.addr
+    %param3.ld.0 = load i64*, i64** %param3.addr, align 8
     %cast.0 = bitcast i64* %param3.ld.0 to i32*
     store i32 5, i32* %cast.0, align 4
     store double 0.000000e+00, double* %p, align 8
-    %p.ld.0 = load double, double* %p
+    %p.ld.0 = load double, double* %p, align 8
     %ftoui.0 = fptoui double %p.ld.0 to i64
     %itpcast.0 = inttoptr i64 %ftoui.0 to i32*
     store i32 5, i32* %itpcast.0, align 4
@@ -354,90 +354,90 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    %p1.ld.0 = load double, double* %p1.addr
+    %p1.ld.0 = load double, double* %p1.addr, align 8
     %fptrunc.0 = fptrunc double %p1.ld.0 to float
     store float %fptrunc.0, float* %p0.addr, align 4
-    %p0.ld.0 = load float, float* %p0.addr
+    %p0.ld.0 = load float, float* %p0.addr, align 4
     %fpext.0 = fpext float %p0.ld.0 to double
     store double %fpext.0, double* %p1.addr, align 8
-    %p2.ld.0 = load i32, i32* %p2.addr
+    %p2.ld.0 = load i32, i32* %p2.addr, align 4
     %sitof.0 = sitofp i32 %p2.ld.0 to float
     store float %sitof.0, float* %p0.addr, align 4
-    %p0.ld.1 = load float, float* %p0.addr
+    %p0.ld.1 = load float, float* %p0.addr, align 4
     %ftosi.0 = fptosi float %p0.ld.1 to i32
     store i32 %ftosi.0, i32* %p2.addr, align 4
-    %p2.ld.1 = load i32, i32* %p2.addr
+    %p2.ld.1 = load i32, i32* %p2.addr, align 4
     %sitof.1 = sitofp i32 %p2.ld.1 to double
     store double %sitof.1, double* %p1.addr, align 8
-    %p1.ld.1 = load double, double* %p1.addr
+    %p1.ld.1 = load double, double* %p1.addr, align 8
     %ftosi.1 = fptosi double %p1.ld.1 to i32
     store i32 %ftosi.1, i32* %p2.addr, align 4
-    %p3.ld.0 = load i64, i64* %p3.addr
+    %p3.ld.0 = load i64, i64* %p3.addr, align 8
     %sitof.2 = sitofp i64 %p3.ld.0 to float
     store float %sitof.2, float* %p0.addr, align 4
-    %p0.ld.2 = load float, float* %p0.addr
+    %p0.ld.2 = load float, float* %p0.addr, align 4
     %ftosi.2 = fptosi float %p0.ld.2 to i64
     store i64 %ftosi.2, i64* %p3.addr, align 8
-    %p3.ld.1 = load i64, i64* %p3.addr
+    %p3.ld.1 = load i64, i64* %p3.addr, align 8
     %sitof.3 = sitofp i64 %p3.ld.1 to double
     store double %sitof.3, double* %p1.addr, align 8
-    %p1.ld.2 = load double, double* %p1.addr
+    %p1.ld.2 = load double, double* %p1.addr, align 8
     %ftosi.3 = fptosi double %p1.ld.2 to i64
     store i64 %ftosi.3, i64* %p3.addr, align 8
-    %p3.ld.2 = load i64, i64* %p3.addr
+    %p3.ld.2 = load i64, i64* %p3.addr, align 8
     %trunc.0 = trunc i64 %p3.ld.2 to i32
     store i32 %trunc.0, i32* %p2.addr, align 4
-    %p2.ld.2 = load i32, i32* %p2.addr
+    %p2.ld.2 = load i32, i32* %p2.addr, align 4
     %sext.0 = sext i32 %p2.ld.2 to i64
     store i64 %sext.0, i64* %p3.addr, align 8
-    %p4.ld.0 = load i32, i32* %p4.addr
+    %p4.ld.0 = load i32, i32* %p4.addr, align 4
     %uitof.0 = uitofp i32 %p4.ld.0 to float
     store float %uitof.0, float* %p0.addr, align 4
-    %p0.ld.3 = load float, float* %p0.addr
+    %p0.ld.3 = load float, float* %p0.addr, align 4
     %ftoui.0 = fptoui float %p0.ld.3 to i32
     store i32 %ftoui.0, i32* %p4.addr, align 4
-    %p4.ld.1 = load i32, i32* %p4.addr
+    %p4.ld.1 = load i32, i32* %p4.addr, align 4
     %uitof.1 = uitofp i32 %p4.ld.1 to double
     store double %uitof.1, double* %p1.addr, align 8
-    %p1.ld.3 = load double, double* %p1.addr
+    %p1.ld.3 = load double, double* %p1.addr, align 8
     %ftoui.1 = fptoui double %p1.ld.3 to i32
     store i32 %ftoui.1, i32* %p4.addr, align 4
-    %p4.ld.2 = load i32, i32* %p4.addr
+    %p4.ld.2 = load i32, i32* %p4.addr, align 4
     store i32 %p4.ld.2, i32* %p2.addr, align 4
-    %p2.ld.3 = load i32, i32* %p2.addr
+    %p2.ld.3 = load i32, i32* %p2.addr, align 4
     store i32 %p2.ld.3, i32* %p4.addr, align 4
-    %p4.ld.3 = load i32, i32* %p4.addr
+    %p4.ld.3 = load i32, i32* %p4.addr, align 4
     %zext.0 = zext i32 %p4.ld.3 to i64
     store i64 %zext.0, i64* %p3.addr, align 8
-    %p3.ld.3 = load i64, i64* %p3.addr
+    %p3.ld.3 = load i64, i64* %p3.addr, align 8
     %trunc.1 = trunc i64 %p3.ld.3 to i32
     store i32 %trunc.1, i32* %p4.addr, align 4
-    %p5.ld.0 = load i64, i64* %p5.addr
+    %p5.ld.0 = load i64, i64* %p5.addr, align 8
     %uitof.2 = uitofp i64 %p5.ld.0 to float
     store float %uitof.2, float* %p0.addr, align 4
-    %p0.ld.4 = load float, float* %p0.addr
+    %p0.ld.4 = load float, float* %p0.addr, align 4
     %ftoui.2 = fptoui float %p0.ld.4 to i64
     store i64 %ftoui.2, i64* %p5.addr, align 8
-    %p5.ld.1 = load i64, i64* %p5.addr
+    %p5.ld.1 = load i64, i64* %p5.addr, align 8
     %uitof.3 = uitofp i64 %p5.ld.1 to double
     store double %uitof.3, double* %p1.addr, align 8
-    %p1.ld.4 = load double, double* %p1.addr
+    %p1.ld.4 = load double, double* %p1.addr, align 8
     %ftoui.3 = fptoui double %p1.ld.4 to i64
     store i64 %ftoui.3, i64* %p5.addr, align 8
-    %p5.ld.2 = load i64, i64* %p5.addr
+    %p5.ld.2 = load i64, i64* %p5.addr, align 8
     %trunc.2 = trunc i64 %p5.ld.2 to i32
     store i32 %trunc.2, i32* %p2.addr, align 4
-    %p2.ld.4 = load i32, i32* %p2.addr
+    %p2.ld.4 = load i32, i32* %p2.addr, align 4
     %sext.1 = sext i32 %p2.ld.4 to i64
     store i64 %sext.1, i64* %p5.addr, align 8
-    %p5.ld.3 = load i64, i64* %p5.addr
+    %p5.ld.3 = load i64, i64* %p5.addr, align 8
     store i64 %p5.ld.3, i64* %p3.addr, align 8
-    %p3.ld.4 = load i64, i64* %p3.addr
+    %p3.ld.4 = load i64, i64* %p3.addr, align 8
     store i64 %p3.ld.4, i64* %p5.addr, align 8
-    %p5.ld.4 = load i64, i64* %p5.addr
+    %p5.ld.4 = load i64, i64* %p5.addr, align 8
     %trunc.3 = trunc i64 %p5.ld.4 to i32
     store i32 %trunc.3, i32* %p4.addr, align 4
-    %p4.ld.4 = load i32, i32* %p4.addr
+    %p4.ld.4 = load i32, i32* %p4.addr, align 4
     %zext.1 = zext i32 %p4.ld.4 to i64
     store i64 %zext.1, i64* %p5.addr, align 8
   )RAW_RESULT");
@@ -495,14 +495,14 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
   entry:
-    %tmp.3 = alloca { double, double }
-    %tmp.2 = alloca { float, float }
-    %tmp.1 = alloca { float, float }
-    %tmp.0 = alloca { double, double }
-    %a = alloca { float, float }
-    %b = alloca { float, float }
-    %x = alloca { double, double }
-    %y = alloca { double, double }
+    %tmp.3 = alloca { double, double }, align 8
+    %tmp.2 = alloca { float, float }, align 8
+    %tmp.1 = alloca { float, float }, align 8
+    %tmp.0 = alloca { double, double }, align 8
+    %a = alloca { float, float }, align 8
+    %b = alloca { float, float }, align 8
+    %x = alloca { double, double }, align 8
+    %y = alloca { double, double }, align 8
     %cast.0 = bitcast { float, float }* %a to i8*
     call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 bitcast ({ float, float }* @const.0 to i8*), i64 8, i1 false)
     %cast.1 = bitcast { float, float }* %b to i8*
@@ -515,10 +515,10 @@
     %cast.5 = bitcast { double, double }* %x to i8*
     call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.4, i8* align 8 %cast.5, i64 16, i1 false)
     %field.0 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 0
-    %.real.ld.0 = load double, double* %field.0
+    %.real.ld.0 = load double, double* %field.0, align 8
     %fptrunc.0 = fptrunc double %.real.ld.0 to float
     %field.1 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 1
-    %.imag.ld.0 = load double, double* %field.1
+    %.imag.ld.0 = load double, double* %field.1, align 8
     %fptrunc.1 = fptrunc double %.imag.ld.0 to float
     %field.2 = getelementptr inbounds { float, float }, { float, float }* %tmp.1, i32 0, i32 0
     store float %fptrunc.0, float* %field.2, align 4
@@ -531,10 +531,10 @@
     %cast.9 = bitcast { float, float }* %b to i8*
     call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.8, i8* align 4 %cast.9, i64 8, i1 false)
     %field.4 = getelementptr inbounds { float, float }, { float, float }* %tmp.2, i32 0, i32 0
-    %.real.ld.1 = load float, float* %field.4
+    %.real.ld.1 = load float, float* %field.4, align 4
     %fpext.0 = fpext float %.real.ld.1 to double
     %field.5 = getelementptr inbounds { float, float }, { float, float }* %tmp.2, i32 0, i32 1
-    %.imag.ld.1 = load float, float* %field.5
+    %.imag.ld.1 = load float, float* %field.5, align 4
     %fpext.1 = fpext float %.imag.ld.1 to double
     %field.6 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 0
     store double %fpext.0, double* %field.6, align 8
@@ -572,21 +572,21 @@
   // We should get a distinct Bexpression each time we create a new
   // var expression.
   Bexpression *ve1 = be->var_expression(loc1, loc);
-  EXPECT_EQ(repr(ve1->value()), "%loc1 = alloca i64");
+  EXPECT_EQ(repr(ve1->value()), "%loc1 = alloca i64, align 8");
   h.mkExprStmt(ve1);
   Bexpression *ve2 = be->var_expression(loc1, loc);
   h.mkExprStmt(ve2);
-  EXPECT_EQ(repr(ve2->value()), "%loc1 = alloca i64");
+  EXPECT_EQ(repr(ve2->value()), "%loc1 = alloca i64, align 8");
   EXPECT_NE(ve1, ve2);
 
   // Same here.
   Bexpression *ve3 = be->var_expression(loc1, loc);
   Bexpression *ve3r = be->var_expression(loc1, loc);
-  EXPECT_EQ(repr(ve3->value()), "%loc1 = alloca i64");
+  EXPECT_EQ(repr(ve3->value()), "%loc1 = alloca i64, align 8");
   h.mkAssign(ve3, ve3r);
   Bexpression *ve4 = be->var_expression(loc1, loc);
   Bexpression *ve4r = be->var_expression(loc1, loc);
-  EXPECT_EQ(repr(ve4->value()), "%loc1 = alloca i64");
+  EXPECT_EQ(repr(ve4->value()), "%loc1 = alloca i64, align 8");
   EXPECT_NE(ve3, ve4);
   h.mkAssign(ve4, ve4r);
 
@@ -633,58 +633,58 @@
     store i64 0, i64* %x, align 8
     store i64 0, i64* %y, align 8
     store double 0.000000e+00, double* %z, align 8
-    %x.ld.0 = load i64, i64* %x
+    %x.ld.0 = load i64, i64* %x, align 8
     %icmp.0 = icmp eq i64 9, %x.ld.0
     %zext.0 = zext i1 %icmp.0 to i8
-    %x.ld.1 = load i64, i64* %x
+    %x.ld.1 = load i64, i64* %x, align 8
     %icmp.1 = icmp ne i64 9, %x.ld.1
     %zext.1 = zext i1 %icmp.1 to i8
-    %x.ld.2 = load i64, i64* %x
+    %x.ld.2 = load i64, i64* %x, align 8
     %icmp.2 = icmp slt i64 9, %x.ld.2
     %zext.2 = zext i1 %icmp.2 to i8
-    %x.ld.3 = load i64, i64* %x
+    %x.ld.3 = load i64, i64* %x, align 8
     %icmp.3 = icmp sle i64 9, %x.ld.3
     %zext.3 = zext i1 %icmp.3 to i8
-    %x.ld.4 = load i64, i64* %x
+    %x.ld.4 = load i64, i64* %x, align 8
     %icmp.4 = icmp sgt i64 9, %x.ld.4
     %zext.4 = zext i1 %icmp.4 to i8
-    %x.ld.5 = load i64, i64* %x
+    %x.ld.5 = load i64, i64* %x, align 8
     %icmp.5 = icmp sge i64 9, %x.ld.5
     %zext.5 = zext i1 %icmp.5 to i8
-    %y.ld.0 = load i64, i64* %y
+    %y.ld.0 = load i64, i64* %y, align 8
     %icmp.6 = icmp eq i64 9, %y.ld.0
     %zext.6 = zext i1 %icmp.6 to i8
-    %y.ld.1 = load i64, i64* %y
+    %y.ld.1 = load i64, i64* %y, align 8
     %icmp.7 = icmp ne i64 9, %y.ld.1
     %zext.7 = zext i1 %icmp.7 to i8
-    %y.ld.2 = load i64, i64* %y
+    %y.ld.2 = load i64, i64* %y, align 8
     %icmp.8 = icmp ult i64 9, %y.ld.2
     %zext.8 = zext i1 %icmp.8 to i8
-    %y.ld.3 = load i64, i64* %y
+    %y.ld.3 = load i64, i64* %y, align 8
     %icmp.9 = icmp ule i64 9, %y.ld.3
     %zext.9 = zext i1 %icmp.9 to i8
-    %y.ld.4 = load i64, i64* %y
+    %y.ld.4 = load i64, i64* %y, align 8
     %icmp.10 = icmp ugt i64 9, %y.ld.4
     %zext.10 = zext i1 %icmp.10 to i8
-    %y.ld.5 = load i64, i64* %y
+    %y.ld.5 = load i64, i64* %y, align 8
     %icmp.11 = icmp uge i64 9, %y.ld.5
     %zext.11 = zext i1 %icmp.11 to i8
-    %z.ld.0 = load double, double* %z
+    %z.ld.0 = load double, double* %z, align 8
     %fcmp.0 = fcmp oeq double 9.000000e+00, %z.ld.0
     %zext.12 = zext i1 %fcmp.0 to i8
-    %z.ld.1 = load double, double* %z
+    %z.ld.1 = load double, double* %z, align 8
     %fcmp.1 = fcmp une double 9.000000e+00, %z.ld.1
     %zext.13 = zext i1 %fcmp.1 to i8
-    %z.ld.2 = load double, double* %z
+    %z.ld.2 = load double, double* %z, align 8
     %fcmp.2 = fcmp olt double 9.000000e+00, %z.ld.2
     %zext.14 = zext i1 %fcmp.2 to i8
-    %z.ld.3 = load double, double* %z
+    %z.ld.3 = load double, double* %z, align 8
     %fcmp.3 = fcmp ole double 9.000000e+00, %z.ld.3
     %zext.15 = zext i1 %fcmp.3 to i8
-    %z.ld.4 = load double, double* %z
+    %z.ld.4 = load double, double* %z, align 8
     %fcmp.4 = fcmp ogt double 9.000000e+00, %z.ld.4
     %zext.16 = zext i1 %fcmp.4 to i8
-    %z.ld.5 = load double, double* %z
+    %z.ld.5 = load double, double* %z, align 8
     %fcmp.5 = fcmp oge double 9.000000e+00, %z.ld.5
     %zext.17 = zext i1 %fcmp.5 to i8
   )RAW_RESULT");
@@ -729,13 +729,13 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     store i64 0, i64* %x, align 8
     store double 0.000000e+00, double* %y, align 8
-    %x.ld.0 = load i64, i64* %x
+    %x.ld.0 = load i64, i64* %x, align 8
     %add.0 = add i64 9, %x.ld.0
-    %x.ld.1 = load i64, i64* %x
+    %x.ld.1 = load i64, i64* %x, align 8
     %sub.0 = sub i64 9, %x.ld.1
-    %y.ld.0 = load double, double* %y
+    %y.ld.0 = load double, double* %y, align 8
     %fadd.0 = fadd double 9.000000e+00, %y.ld.0
-    %y.ld.1 = load double, double* %y
+    %y.ld.1 = load double, double* %y, align 8
     %fsub.0 = fsub double 9.000000e+00, %y.ld.1
   )RAW_RESULT");
 
@@ -773,10 +773,10 @@
     store i64 9, i64* %y, align 8
     store i64 10, i64* %z, align 8
     store i64 11, i64* %w, align 8
-    %y.ld.0 = load i64, i64* %y
-    %z.ld.0 = load i64, i64* %z
+    %y.ld.0 = load i64, i64* %y, align 8
+    %z.ld.0 = load i64, i64* %z, align 8
     %add.0 = add i64 %y.ld.0, %z.ld.0
-    %w.ld.0 = load i64, i64* %w
+    %w.ld.0 = load i64, i64* %w, align 8
     %add.1 = add i64 %add.0, %w.ld.0
     store i64 %add.1, i64* %x, align 8
   )RAW_RESULT");
@@ -831,59 +831,59 @@
     store i64 0, i64* %x2, align 8
     store i64 0, i64* %y2, align 8
     store i8 0, i8* %z2, align 1
-    %x.ld.0 = load i64, i64* %x
-    %x2.ld.0 = load i64, i64* %x2
+    %x.ld.0 = load i64, i64* %x, align 8
+    %x2.ld.0 = load i64, i64* %x2, align 8
     %iand.0 = and i64 %x.ld.0, %x2.ld.0
-    %x.ld.1 = load i64, i64* %x
-    %x2.ld.1 = load i64, i64* %x2
+    %x.ld.1 = load i64, i64* %x, align 8
+    %x2.ld.1 = load i64, i64* %x2, align 8
     %ior.0 = or i64 %x.ld.1, %x2.ld.1
-    %x.ld.2 = load i64, i64* %x
-    %x2.ld.2 = load i64, i64* %x2
+    %x.ld.2 = load i64, i64* %x, align 8
+    %x2.ld.2 = load i64, i64* %x2, align 8
     %iand.1 = and i64 %x.ld.2, %x2.ld.2
-    %x.ld.3 = load i64, i64* %x
-    %x2.ld.3 = load i64, i64* %x2
+    %x.ld.3 = load i64, i64* %x, align 8
+    %x2.ld.3 = load i64, i64* %x2, align 8
     %ior.1 = or i64 %x.ld.3, %x2.ld.3
-    %x.ld.4 = load i64, i64* %x
-    %x2.ld.4 = load i64, i64* %x2
+    %x.ld.4 = load i64, i64* %x, align 8
+    %x2.ld.4 = load i64, i64* %x2, align 8
     %xor.0 = xor i64 %x.ld.4, %x2.ld.4
-    %x.ld.5 = load i64, i64* %x
-    %x2.ld.5 = load i64, i64* %x2
+    %x.ld.5 = load i64, i64* %x, align 8
+    %x2.ld.5 = load i64, i64* %x2, align 8
     %iand.2 = and i64 %x.ld.5, %x2.ld.5
-    %y.ld.0 = load i64, i64* %y
-    %y2.ld.0 = load i64, i64* %y2
+    %y.ld.0 = load i64, i64* %y, align 8
+    %y2.ld.0 = load i64, i64* %y2, align 8
     %iand.3 = and i64 %y.ld.0, %y2.ld.0
-    %y.ld.1 = load i64, i64* %y
-    %y2.ld.1 = load i64, i64* %y2
+    %y.ld.1 = load i64, i64* %y, align 8
+    %y2.ld.1 = load i64, i64* %y2, align 8
     %ior.2 = or i64 %y.ld.1, %y2.ld.1
-    %y.ld.2 = load i64, i64* %y
-    %y2.ld.2 = load i64, i64* %y2
+    %y.ld.2 = load i64, i64* %y, align 8
+    %y2.ld.2 = load i64, i64* %y2, align 8
     %iand.4 = and i64 %y.ld.2, %y2.ld.2
-    %y.ld.3 = load i64, i64* %y
-    %y2.ld.3 = load i64, i64* %y2
+    %y.ld.3 = load i64, i64* %y, align 8
+    %y2.ld.3 = load i64, i64* %y2, align 8
     %ior.3 = or i64 %y.ld.3, %y2.ld.3
-    %y.ld.4 = load i64, i64* %y
-    %y2.ld.4 = load i64, i64* %y2
+    %y.ld.4 = load i64, i64* %y, align 8
+    %y2.ld.4 = load i64, i64* %y2, align 8
     %xor.1 = xor i64 %y.ld.4, %y2.ld.4
-    %y.ld.5 = load i64, i64* %y
-    %y2.ld.5 = load i64, i64* %y2
+    %y.ld.5 = load i64, i64* %y, align 8
+    %y2.ld.5 = load i64, i64* %y2, align 8
     %iand.5 = and i64 %y.ld.5, %y2.ld.5
-    %z.ld.0 = load i8, i8* %z
-    %z2.ld.0 = load i8, i8* %z2
+    %z.ld.0 = load i8, i8* %z, align 1
+    %z2.ld.0 = load i8, i8* %z2, align 1
     %iand.6 = and i8 %z.ld.0, %z2.ld.0
-    %z.ld.1 = load i8, i8* %z
-    %z2.ld.1 = load i8, i8* %z2
+    %z.ld.1 = load i8, i8* %z, align 1
+    %z2.ld.1 = load i8, i8* %z2, align 1
     %ior.4 = or i8 %z.ld.1, %z2.ld.1
-    %z.ld.2 = load i8, i8* %z
-    %z2.ld.2 = load i8, i8* %z2
+    %z.ld.2 = load i8, i8* %z, align 1
+    %z2.ld.2 = load i8, i8* %z2, align 1
     %iand.7 = and i8 %z.ld.2, %z2.ld.2
-    %z.ld.3 = load i8, i8* %z
-    %z2.ld.3 = load i8, i8* %z2
+    %z.ld.3 = load i8, i8* %z, align 1
+    %z2.ld.3 = load i8, i8* %z2, align 1
     %ior.5 = or i8 %z.ld.3, %z2.ld.3
-    %z.ld.4 = load i8, i8* %z
-    %z2.ld.4 = load i8, i8* %z2
+    %z.ld.4 = load i8, i8* %z, align 1
+    %z2.ld.4 = load i8, i8* %z2, align 1
     %xor.2 = xor i8 %z.ld.4, %z2.ld.4
-    %z.ld.5 = load i8, i8* %z
-    %z2.ld.5 = load i8, i8* %z2
+    %z.ld.5 = load i8, i8* %z, align 1
+    %z2.ld.5 = load i8, i8* %z2, align 1
     %iand.8 = and i8 %z.ld.5, %z2.ld.5
   )RAW_RESULT");
 
@@ -934,21 +934,21 @@
     store i16 0, i16* %x, align 2
     store i16 0, i16* %y, align 2
     store double 0.000000e+00, double* %z, align 8
-    %x.ld.0 = load i16, i16* %x
+    %x.ld.0 = load i16, i16* %x, align 2
     %mul.0 = mul i16 -17, %x.ld.0
-    %x.ld.1 = load i16, i16* %x
+    %x.ld.1 = load i16, i16* %x, align 2
     %div.0 = sdiv i16 -17, %x.ld.1
-    %x.ld.2 = load i16, i16* %x
+    %x.ld.2 = load i16, i16* %x, align 2
     %mod.0 = srem i16 -17, %x.ld.2
-    %y.ld.0 = load i16, i16* %y
+    %y.ld.0 = load i16, i16* %y, align 2
     %mul.1 = mul i16 13, %y.ld.0
-    %y.ld.1 = load i16, i16* %y
+    %y.ld.1 = load i16, i16* %y, align 2
     %div.1 = udiv i16 13, %y.ld.1
-    %y.ld.2 = load i16, i16* %y
+    %y.ld.2 = load i16, i16* %y, align 2
     %mod.1 = urem i16 13, %y.ld.2
-    %z.ld.0 = load double, double* %z
+    %z.ld.0 = load double, double* %z, align 8
     %fmul.0 = fmul double 9.000000e+00, %z.ld.0
-    %z.ld.1 = load double, double* %z
+    %z.ld.1 = load double, double* %z, align 8
     %fdiv.0 = fdiv double 9.000000e+00, %z.ld.1
   )RAW_RESULT");
 
@@ -1015,24 +1015,24 @@
     store i64 0, i64* %y, align 8
     store i64 0, i64* %s, align 8
     store i32 0, i32* %z, align 4
-    %x.ld.0 = load i64, i64* %x
-    %s.ld.0 = load i64, i64* %s
+    %x.ld.0 = load i64, i64* %x, align 8
+    %s.ld.0 = load i64, i64* %s, align 8
     %shl.0 = shl i64 %x.ld.0, %s.ld.0
-    %x.ld.1 = load i64, i64* %x
-    %s.ld.1 = load i64, i64* %s
+    %x.ld.1 = load i64, i64* %x, align 8
+    %s.ld.1 = load i64, i64* %s, align 8
     %shr.0 = ashr i64 %x.ld.1, %s.ld.1
-    %y.ld.0 = load i64, i64* %y
-    %s.ld.2 = load i64, i64* %s
+    %y.ld.0 = load i64, i64* %y, align 8
+    %s.ld.2 = load i64, i64* %s, align 8
     %shl.1 = shl i64 %y.ld.0, %s.ld.2
-    %y.ld.1 = load i64, i64* %y
-    %s.ld.3 = load i64, i64* %s
+    %y.ld.1 = load i64, i64* %y, align 8
+    %s.ld.3 = load i64, i64* %s, align 8
     %shr.1 = lshr i64 %y.ld.1, %s.ld.3
-    %x.ld.2 = load i64, i64* %x
-    %z.ld.0 = load i32, i32* %z
+    %x.ld.2 = load i64, i64* %x, align 8
+    %z.ld.0 = load i32, i32* %z, align 4
     %zext.0 = zext i32 %z.ld.0 to i64
     %shl.2 = shl i64 %x.ld.2, %zext.0
-    %z.ld.1 = load i32, i32* %z
-    %y.ld.2 = load i64, i64* %y
+    %z.ld.1 = load i32, i32* %z, align 4
+    %y.ld.2 = load i64, i64* %y, align 8
     %trunc.0 = trunc i64 %y.ld.2 to i32
     %shr.2 = lshr i32 %z.ld.1, %trunc.0
   )RAW_RESULT");
@@ -1076,23 +1076,23 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
   entry:
-    %tmp.12 = alloca { double, double }
-    %tmp.11 = alloca { double, double }
-    %tmp.10 = alloca { double, double }
-    %tmp.9 = alloca { double, double }
-    %tmp.8 = alloca { double, double }
-    %tmp.7 = alloca { double, double }
-    %tmp.6 = alloca { double, double }
-    %tmp.5 = alloca { double, double }
-    %tmp.4 = alloca { double, double }
-    %tmp.3 = alloca { double, double }
-    %tmp.2 = alloca { double, double }
-    %tmp.1 = alloca { double, double }
-    %tmp.0 = alloca { double, double }
-    %x = alloca { double, double }
-    %y = alloca { double, double }
-    %z = alloca { double, double }
-    %b = alloca i8
+    %tmp.12 = alloca { double, double }, align 8
+    %tmp.11 = alloca { double, double }, align 8
+    %tmp.10 = alloca { double, double }, align 8
+    %tmp.9 = alloca { double, double }, align 8
+    %tmp.8 = alloca { double, double }, align 8
+    %tmp.7 = alloca { double, double }, align 8
+    %tmp.6 = alloca { double, double }, align 8
+    %tmp.5 = alloca { double, double }, align 8
+    %tmp.4 = alloca { double, double }, align 8
+    %tmp.3 = alloca { double, double }, align 8
+    %tmp.2 = alloca { double, double }, align 8
+    %tmp.1 = alloca { double, double }, align 8
+    %tmp.0 = alloca { double, double }, align 8
+    %x = alloca { double, double }, align 8
+    %y = alloca { double, double }, align 8
+    %z = alloca { double, double }, align 8
+    %b = alloca i8, align 1
     %cast.0 = bitcast { double, double }* %x to i8*
     call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
     %cast.1 = bitcast { double, double }* %y to i8*
@@ -1107,14 +1107,14 @@
     %cast.6 = bitcast { double, double }* %y to i8*
     call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.5, i8* align 8 %cast.6, i64 16, i1 false)
     %field.0 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 0
-    %.real.ld.0 = load double, double* %field.0
+    %.real.ld.0 = load double, double* %field.0, align 8
     %field.1 = getelementptr inbounds { double, double }, { double, double }* %tmp.1, i32 0, i32 0
-    %.real.ld.1 = load double, double* %field.1
+    %.real.ld.1 = load double, double* %field.1, align 8
     %fadd.0 = fadd double %.real.ld.0, %.real.ld.1
     %field.2 = getelementptr inbounds { double, double }, { double, double }* %tmp.0, i32 0, i32 1
-    %.imag.ld.0 = load double, double* %field.2
+    %.imag.ld.0 = load double, double* %field.2, align 8
     %field.3 = getelementptr inbounds { double, double }, { double, double }* %tmp.1, i32 0, i32 1
-    %.imag.ld.1 = load double, double* %field.3
+    %.imag.ld.1 = load double, double* %field.3, align 8
     %fadd.1 = fadd double %.imag.ld.0, %.imag.ld.1
     %field.4 = getelementptr inbounds { double, double }, { double, double }* %tmp.2, i32 0, i32 0
     store double %fadd.0, double* %field.4, align 8
@@ -1130,14 +1130,14 @@
     %cast.12 = bitcast { double, double }* %y to i8*
     call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.11, i8* align 8 %cast.12, i64 16, i1 false)
     %field.6 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 0
-    %.real.ld.2 = load double, double* %field.6
+    %.real.ld.2 = load double, double* %field.6, align 8
     %field.7 = getelementptr inbounds { double, double }, { double, double }* %tmp.4, i32 0, i32 0
-    %.real.ld.3 = load double, double* %field.7
+    %.real.ld.3 = load double, double* %field.7, align 8
     %fsub.0 = fsub double %.real.ld.2, %.real.ld.3
     %field.8 = getelementptr inbounds { double, double }, { double, double }* %tmp.3, i32 0, i32 1
-    %.imag.ld.2 = load double, double* %field.8
+    %.imag.ld.2 = load double, double* %field.8, align 8
     %field.9 = getelementptr inbounds { double, double }, { double, double }* %tmp.4, i32 0, i32 1
-    %.imag.ld.3 = load double, double* %field.9
+    %.imag.ld.3 = load double, double* %field.9, align 8
     %fsub.1 = fsub double %.imag.ld.2, %.imag.ld.3
     %field.10 = getelementptr inbounds { double, double }, { double, double }* %tmp.5, i32 0, i32 0
     store double %fsub.0, double* %field.10, align 8
@@ -1153,25 +1153,25 @@
     %cast.18 = bitcast { double, double }* %y to i8*
     call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.17, i8* align 8 %cast.18, i64 16, i1 false)
     %field.12 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 0
-    %.real.ld.4 = load double, double* %field.12
+    %.real.ld.4 = load double, double* %field.12, align 8
     %field.13 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 0
-    %.real.ld.5 = load double, double* %field.13
+    %.real.ld.5 = load double, double* %field.13, align 8
     %fmul.0 = fmul double %.real.ld.4, %.real.ld.5
     %field.14 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 1
-    %.imag.ld.4 = load double, double* %field.14
+    %.imag.ld.4 = load double, double* %field.14, align 8
     %field.15 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 1
-    %.imag.ld.5 = load double, double* %field.15
+    %.imag.ld.5 = load double, double* %field.15, align 8
     %fmul.1 = fmul double %.imag.ld.4, %.imag.ld.5
     %fsub.2 = fsub double %fmul.0, %fmul.1
     %field.16 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 0
-    %.field.ld.0 = load double, double* %field.16
+    %.field.ld.0 = load double, double* %field.16, align 8
     %field.17 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 1
-    %.field.ld.1 = load double, double* %field.17
+    %.field.ld.1 = load double, double* %field.17, align 8
     %fmul.2 = fmul double %.field.ld.0, %.field.ld.1
     %field.18 = getelementptr inbounds { double, double }, { double, double }* %tmp.6, i32 0, i32 1
-    %.field.ld.2 = load double, double* %field.18
+    %.field.ld.2 = load double, double* %field.18, align 8
     %field.19 = getelementptr inbounds { double, double }, { double, double }* %tmp.7, i32 0, i32 0
-    %.field.ld.3 = load double, double* %field.19
+    %.field.ld.3 = load double, double* %field.19, align 8
     %fmul.3 = fmul double %.field.ld.2, %.field.ld.3
     %fadd.2 = fadd double %fmul.2, %fmul.3
     %field.20 = getelementptr inbounds { double, double }, { double, double }* %tmp.8, i32 0, i32 0
@@ -1188,15 +1188,15 @@
     %cast.24 = bitcast { double, double }* %y to i8*
     call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.23, i8* align 8 %cast.24, i64 16, i1 false)
     %field.22 = getelementptr inbounds { double, double }, { double, double }* %tmp.9, i32 0, i32 0
-    %.real.ld.6 = load double, double* %field.22
+    %.real.ld.6 = load double, double* %field.22, align 8
     %field.23 = getelementptr inbounds { double, double }, { double, double }* %tmp.10, i32 0, i32 0
-    %.real.ld.7 = load double, double* %field.23
+    %.real.ld.7 = load double, double* %field.23, align 8
     %fcmp.0 = fcmp oeq double %.real.ld.6, %.real.ld.7
     %zext.0 = zext i1 %fcmp.0 to i8
     %field.24 = getelementptr inbounds { double, double }, { double, double }* %tmp.9, i32 0, i32 1
-    %.imag.ld.6 = load double, double* %field.24
+    %.imag.ld.6 = load double, double* %field.24, align 8
     %field.25 = getelementptr inbounds { double, double }, { double, double }* %tmp.10, i32 0, i32 1
-    %.imag.ld.7 = load double, double* %field.25
+    %.imag.ld.7 = load double, double* %field.25, align 8
     %fcmp.1 = fcmp oeq double %.imag.ld.6, %.imag.ld.7
     %zext.1 = zext i1 %fcmp.1 to i8
     %iand.0 = and i8 %zext.0, %zext.1
@@ -1208,15 +1208,15 @@
     %cast.28 = bitcast { double, double }* %y to i8*
     call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.27, i8* align 8 %cast.28, i64 16, i1 false)
     %field.26 = getelementptr inbounds { double, double }, { double, double }* %tmp.11, i32 0, i32 0
-    %.real.ld.8 = load double, double* %field.26
+    %.real.ld.8 = load double, double* %field.26, align 8
     %field.27 = getelementptr inbounds { double, double }, { double, double }* %tmp.12, i32 0, i32 0
-    %.real.ld.9 = load double, double* %field.27
+    %.real.ld.9 = load double, double* %field.27, align 8
     %fcmp.2 = fcmp une double %.real.ld.8, %.real.ld.9
     %zext.2 = zext i1 %fcmp.2 to i8
     %field.28 = getelementptr inbounds { double, double }, { double, double }* %tmp.11, i32 0, i32 1
-    %.imag.ld.8 = load double, double* %field.28
+    %.imag.ld.8 = load double, double* %field.28, align 8
     %field.29 = getelementptr inbounds { double, double }, { double, double }* %tmp.12, i32 0, i32 1
-    %.imag.ld.9 = load double, double* %field.29
+    %.imag.ld.9 = load double, double* %field.29, align 8
     %fcmp.3 = fcmp une double %.imag.ld.8, %.imag.ld.9
     %zext.3 = zext i1 %fcmp.3 to i8
     %ior.0 = or i8 %zext.2, %zext.3
@@ -1271,13 +1271,13 @@
     %cast.0 = bitcast { double, double }* %x to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ double, double }* @const.0 to i8*), i64 16, i1 false)
     %field.0 = getelementptr inbounds { double, double }, { double, double }* %x, i32 0, i32 0
-    %x.real.ld.0 = load double, double* %field.0
+    %x.real.ld.0 = load double, double* %field.0, align 8
     store double %x.real.ld.0, double* %a, align 8
     %field.1 = getelementptr inbounds { double, double }, { double, double }* %x, i32 0, i32 1
-    %x.imag.ld.0 = load double, double* %field.1
+    %x.imag.ld.0 = load double, double* %field.1, align 8
     store double %x.imag.ld.0, double* %b, align 8
-    %b.ld.0 = load double, double* %b
-    %a.ld.0 = load double, double* %a
+    %b.ld.0 = load double, double* %b, align 8
+    %a.ld.0 = load double, double* %a, align 8
     %field.2 = getelementptr inbounds { double, double }, { double, double }* %x, i32 0, i32 0
     store double %b.ld.0, double* %field.2, align 8
     %field.3 = getelementptr inbounds { double, double }, { double, double }* %x, i32 0, i32 1
@@ -1344,12 +1344,12 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
   entry:
-    %a = alloca i64
-    %b = alloca i64
+    %a = alloca i64, align 8
+    %b = alloca i64, align 8
     store i64 0, i64* %a, align 8
     store i64 0, i64* %b, align 8
-    %a.ld.0 = load i64, i64* %a
-    %b.ld.0 = load i64, i64* %b
+    %a.ld.0 = load i64, i64* %a, align 8
+    %b.ld.0 = load i64, i64* %b, align 8
     %icmp.0 = icmp slt i64 %a.ld.0, %b.ld.0
     %zext.0 = zext i1 %icmp.0 to i8
     %trunc.0 = trunc i8 %zext.0 to i1
@@ -1401,8 +1401,8 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
   entry:
-    %a = alloca i64
-    %tmpv.0 = alloca i64
+    %a = alloca i64, align 8
+    %tmpv.0 = alloca i64, align 8
     store i64 0, i64* %a, align 8
     br i1 true, label %then.0, label %else.0
   
@@ -1411,11 +1411,11 @@
     br label %fallthrough.0
   
   fallthrough.0:                                    ; preds = %else.0, %then.0
-    %tmpv.0.ld.0 = load i64, i64* %tmpv.0
+    %tmpv.0.ld.0 = load i64, i64* %tmpv.0, align 8
     ret void
   
   else.0:                                           ; preds = %entry
-    %a.ld.0 = load i64, i64* %a
+    %a.ld.0 = load i64, i64* %a, align 8
     store i64 %a.ld.0, i64* %tmpv.0, align 8
     br label %fallthrough.0
   }
@@ -1457,11 +1457,11 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo({ [16 x i32], i32 }* sret %sret.formal.0, i8* nest %nest.0, { [16 x i32], i32 }* byval %p0, i32 %p1) #0 {
   entry:
-    %p1.addr = alloca i32
-    %a = alloca { [16 x i32], i32 }
-    %tmpv.0 = alloca { [16 x i32], i32 }
+    %p1.addr = alloca i32, align 4
+    %a = alloca { [16 x i32], i32 }, align 8
+    %tmpv.0 = alloca { [16 x i32], i32 }, align 8
     store i32 %p1, i32* %p1.addr, align 4
-    %p1.ld.0 = load i32, i32* %p1.addr
+    %p1.ld.0 = load i32, i32* %p1.addr, align 4
     %icmp.0 = icmp slt i32 %p1.ld.0, 7
     %zext.0 = zext i1 %icmp.0 to i8
     %trunc.0 = trunc i8 %zext.0 to i1
@@ -1532,11 +1532,11 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo({ [16 x i32], i32 }* sret %sret.formal.0, i8* nest %nest.0, { [16 x i32], i32 }* %p0, i32 %p1) #0 {
   entry:
-    %p1.addr = alloca i32
-    %a = alloca { [16 x i32], i32 }
-    %tmpv.0 = alloca { [16 x i32], i32 }
+    %p1.addr = alloca i32, align 4
+    %a = alloca { [16 x i32], i32 }, align 8
+    %tmpv.0 = alloca { [16 x i32], i32 }, align 8
     store i32 %p1, i32* %p1.addr, align 4
-    %p1.ld.0 = load i32, i32* %p1.addr
+    %p1.ld.0 = load i32, i32* %p1.addr, align 4
     %icmp.0 = icmp slt i32 %p1.ld.0, 7
     %zext.0 = zext i1 %icmp.0 to i8
     %trunc.0 = trunc i8 %zext.0 to i1
@@ -1591,16 +1591,16 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define i64 @foo(i8* nest %nest.0, i32 %param1, i32 %param2, i64* %param3) #0 {
   entry:
-    %param1.addr = alloca i32
-    %param2.addr = alloca i32
-    %param3.addr = alloca i64*
-    %x = alloca i64
+    %param1.addr = alloca i32, align 4
+    %param2.addr = alloca i32, align 4
+    %param3.addr = alloca i64*, align 8
+    %x = alloca i64, align 8
     store i32 %param1, i32* %param1.addr, align 4
     store i32 %param2, i32* %param2.addr, align 4
     store i64* %param3, i64** %param3.addr, align 8
     store i64 0, i64* %x, align 8
     store i64 5, i64* %x, align 8
-    %x.ld.0 = load i64, i64* %x
+    %x.ld.0 = load i64, i64* %x, align 8
     ret i64 0
   }
   )RAW_RESULT");
@@ -1641,12 +1641,12 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define i64 @foo(i8* nest %nest.0, i32 %param1, i32 %param2, i64* %param3) #0 {
   entry:
-    %tmp.0 = alloca { i64, i64 }
-    %param1.addr = alloca i32
-    %param2.addr = alloca i32
-    %param3.addr = alloca i64*
-    %x = alloca i64
-    %y = alloca { i64, i64 }
+    %tmp.0 = alloca { i64, i64 }, align 8
+    %param1.addr = alloca i32, align 4
+    %param2.addr = alloca i32, align 4
+    %param3.addr = alloca i64*, align 8
+    %x = alloca i64, align 8
+    %y = alloca { i64, i64 }, align 8
     store i32 %param1, i32* %param1.addr, align 4
     store i32 %param2, i32* %param2.addr, align 4
     store i64* %param3, i64** %param3.addr, align 8
@@ -1654,8 +1654,8 @@
     %cast.0 = bitcast { i64, i64 }* %y to i8*
     call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i64, i64 }* @const.0 to i8*), i64 16, i1 false)
     store i64 5, i64* %x, align 8
-    %x.ld.0 = load i64, i64* %x
-    %x.ld.1 = load i64, i64* %x
+    %x.ld.0 = load i64, i64* %x, align 8
+    %x.ld.1 = load i64, i64* %x, align 8
     %field.0 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp.0, i32 0, i32 0
     store i64 %x.ld.0, i64* %field.0, align 8
     %field.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp.0, i32 0, i32 1
@@ -1703,29 +1703,29 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0, i32* %p0, i32* %p1) #0 {
   entry:
-    %p0.addr = alloca i32*
-    %p1.addr = alloca i32*
-    %tmpv.0 = alloca i32*
+    %p0.addr = alloca i32*, align 8
+    %p1.addr = alloca i32*, align 8
+    %tmpv.0 = alloca i32*, align 8
     store i32* %p0, i32** %p0.addr, align 8
     store i32* %p1, i32** %p1.addr, align 8
-    %p0.ld.0 = load i32*, i32** %p0.addr
+    %p0.ld.0 = load i32*, i32** %p0.addr, align 8
     %icmp.0 = icmp eq i32* %p0.ld.0, null
     %zext.0 = zext i1 %icmp.0 to i8
     %trunc.0 = trunc i8 %zext.0 to i1
     br i1 %trunc.0, label %then.0, label %else.0
   
   then.0:                                           ; preds = %entry
-    %p1.ld.0 = load i32*, i32** %p1.addr
+    %p1.ld.0 = load i32*, i32** %p1.addr, align 8
     store i32* %p1.ld.0, i32** %tmpv.0, align 8
     br label %fallthrough.0
   
   fallthrough.0:                                    ; preds = %else.0, %then.0
-    %tmpv.0.ld.0 = load i32*, i32** %tmpv.0
+    %tmpv.0.ld.0 = load i32*, i32** %tmpv.0, align 8
     store i32 7, i32* %tmpv.0.ld.0, align 4
     ret void
   
   else.0:                                           ; preds = %entry
-    %p0.ld.1 = load i32*, i32** %p0.addr
+    %p0.ld.1 = load i32*, i32** %p0.addr, align 8
     store i32* %p0.ld.1, i32** %tmpv.0, align 8
     br label %fallthrough.0
   }
@@ -1775,21 +1775,21 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     store i8 0, i8* %x, align 1
-    %x.ld.0 = load i8, i8* %x
+    %x.ld.0 = load i8, i8* %x, align 1
     %icmp.0 = icmp ne i8 %x.ld.0, 0
     %xor.0 = xor i1 %icmp.0, true
     %zext.0 = zext i1 %xor.0 to i8
     store i8 %zext.0, i8* %y, align 1
     store i32 0, i32* %a, align 4
-    %a.ld.0 = load i32, i32* %a
+    %a.ld.0 = load i32, i32* %a, align 4
     %sub.0 = sub i32 0, %a.ld.0
     store i32 %sub.0, i32* %b, align 4
     store i64 0, i64* %z, align 8
-    %z.ld.0 = load i64, i64* %z
+    %z.ld.0 = load i64, i64* %z, align 8
     %xor.1 = xor i64 %z.ld.0, -1
     store i64 %xor.1, i64* %w, align 8
     store double 0.000000e+00, double* %q, align 8
-    %q.ld.0 = load double, double* %q
+    %q.ld.0 = load double, double* %q, align 8
     %fsub.0 = fsub double -0.000000e+00, %q.ld.0
     store double %fsub.0, double* %r, align 8
   )RAW_RESULT");
diff --git a/unittests/BackendCore/BackendFcnTests.cpp b/unittests/BackendCore/BackendFcnTests.cpp
index a8f1756..79a3692 100644
--- a/unittests/BackendCore/BackendFcnTests.cpp
+++ b/unittests/BackendCore/BackendFcnTests.cpp
@@ -299,7 +299,7 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     store i64 0, i64* %x, align 8
-    %x.ld.0 = load i64, i64* %x
+    %x.ld.0 = load i64, i64* %x, align 8
     %call.0 = call addrspace(0) i64 @llvm.cttz.i64(i64 %x.ld.0, i1 true)
   )RAW_RESULT");
 
diff --git a/unittests/BackendCore/BackendNodeTests.cpp b/unittests/BackendCore/BackendNodeTests.cpp
index 4dfe887..86396fc 100644
--- a/unittests/BackendCore/BackendNodeTests.cpp
+++ b/unittests/BackendCore/BackendNodeTests.cpp
@@ -163,12 +163,12 @@
   EXPECT_NE(add, matclone);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    %x.ld.0 = load i32, i32* %x
-    %z.ld.0 = load i16, i16* %z
+    %x.ld.0 = load i32, i32* %x, align 4
+    %z.ld.0 = load i16, i16* %z, align 2
     %sext.0 = sext i16 %z.ld.0 to i32
     %add.0 = add i32 %x.ld.0, %sext.0
-    %y.ld.0 = load i32*, i32** %y
-    %.ld.0 = load i32, i32* %y.ld.0
+    %y.ld.0 = load i32*, i32** %y, align 8
+    %.ld.0 = load i32, i32* %y.ld.0, align 4
     %add.1 = add i32 %add.0, %.ld.0
   )RAW_RESULT");
 
@@ -207,12 +207,12 @@
   DECLARE_EXPECTED_OUTPUT(exp2, R"RAW_RESULT(
     %field.0 = getelementptr inbounds { { i32*, i32 }, { i32*, i32 } }, { { i32*, i32 }, { i32*, i32 } }* %x, i32 0, i32 0
     %field.1 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %field.0, i32 0, i32 0
-    %x.field.field.ld.0 = load i32*, i32** %field.1
-    %.ld.0 = load i32, i32* %x.field.field.ld.0
+    %x.field.field.ld.0 = load i32*, i32** %field.1, align 8
+    %.ld.0 = load i32, i32* %x.field.field.ld.0, align 4
     %field.2 = getelementptr inbounds { { i32*, i32 }, { i32*, i32 } }, { { i32*, i32 }, { i32*, i32 } }* %x, i32 0, i32 0
     %field.3 = getelementptr inbounds { i32*, i32 }, { i32*, i32 }* %field.2, i32 0, i32 0
-    %.field.field.ld.0 = load i32*, i32** %field.3
-    %.ld.1 = load i32, i32* %.field.field.ld.0
+    %.field.field.ld.0 = load i32*, i32** %field.3, align 8
+    %.ld.1 = load i32, i32* %.field.field.ld.0, align 4
     %add.0 = add i32 %.ld.0, %.ld.1
   )RAW_RESULT");
 
diff --git a/unittests/BackendCore/BackendPointerExprTests.cpp b/unittests/BackendCore/BackendPointerExprTests.cpp
index af4f04c..9311990 100644
--- a/unittests/BackendCore/BackendPointerExprTests.cpp
+++ b/unittests/BackendCore/BackendPointerExprTests.cpp
@@ -75,10 +75,10 @@
     store i64 10, i64* %y, align 8
     store i64* null, i64** %x, align 8
     store i64* %y, i64** %x, align 8
-    %x.ld.0 = load i64*, i64** %x
-    %.ld.0 = load i64, i64* %x.ld.0
+    %x.ld.0 = load i64*, i64** %x, align 8
+    %.ld.0 = load i64, i64* %x.ld.0, align 8
     store i64 %.ld.0, i64* %y, align 8
-    %x.ld.1 = load i64*, i64** %x
+    %x.ld.1 = load i64*, i64** %x, align 8
     store i64 3, i64* %x.ld.1, align 8
   )RAW_RESULT");
 
@@ -134,10 +134,10 @@
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %cast.0, i8* align 8 bitcast ({ i64 }* @const.0 to i8*), i64 8, i1 false)
     store { i64 }* %fdloc1, { i64 }** %fploc1, align 8
     store { i64 (i8*, i32, i32, i64*)* }* null, { i64 (i8*, i32, i32, i64*)* }** %fploc2, align 8
-    %fploc1.ld.0 = load { i64 }*, { i64 }** %fploc1
+    %fploc1.ld.0 = load { i64 }*, { i64 }** %fploc1, align 8
     %cast.1 = bitcast { i64 }* %fploc1.ld.0 to { i64 (i8*, i32, i32, i64*)* }*
     store { i64 (i8*, i32, i32, i64*)* }* %cast.1, { i64 (i8*, i32, i32, i64*)* }** %fploc2, align 8
-    %fploc2.ld.0 = load { i64 (i8*, i32, i32, i64*)* }*, { i64 (i8*, i32, i32, i64*)* }** %fploc2
+    %fploc2.ld.0 = load { i64 (i8*, i32, i32, i64*)* }*, { i64 (i8*, i32, i32, i64*)* }** %fploc2, align 8
     %cast.2 = bitcast { i64 (i8*, i32, i32, i64*)* }* %fploc2.ld.0 to { i64 }*
     store { i64 }* %cast.2, { i64 }** %fploc1, align 8
   )RAW_RESULT");
@@ -190,11 +190,11 @@
   DECLARE_EXPECTED_OUTPUT(exp2, R"RAW_RESULT(
     store i8 0, i8* %b1, align 1
     store i8* null, i8** %pb1, align 8
-    %pb1.ld.0 = load i8*, i8** %pb1
+    %pb1.ld.0 = load i8*, i8** %pb1, align 8
     %icmp.0 = icmp eq i8* %pb1.ld.0, null
     %zext.0 = zext i1 %icmp.0 to i8
     store i8 %zext.0, i8* %b1, align 1
-    %pb1.ld.1 = load i8*, i8** %pb1
+    %pb1.ld.1 = load i8*, i8** %pb1, align 8
     %icmp.1 = icmp eq i8* null, %pb1.ld.1
     %zext.1 = zext i1 %icmp.1 to i8
     store i8 %zext.1, i8* %b1, align 1
@@ -225,7 +225,7 @@
   h.mkLocal("y", bst, deref2);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    %deref.ld.0 = load i32, i32* null
+    %deref.ld.0 = load i32, i32* null, align 4
     store i32 %deref.ld.0, i32* %x, align 4
     %cast.2 = bitcast { i32, i32 }* %y to i8*
     call addrspace(0) void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.2, i8* align 4 null, i64 8, i1 false)
@@ -330,26 +330,26 @@
     store i8 0, i8* %b1, align 1
     store i8 0, i8* %b2, align 1
     store i8 0, i8* %b3, align 1
-    %cpv1.ld.0 = load %CPT.0*, %CPT.0** %cpv1
+    %cpv1.ld.0 = load %CPT.0*, %CPT.0** %cpv1, align 8
     %cast.2 = bitcast %CPT.0** %cpv2 to %CPT.0***
-    %cpv2.ld.0 = load %CPT.0**, %CPT.0*** %cast.2
-    %.ld.0 = load %CPT.0*, %CPT.0** %cpv2.ld.0
+    %cpv2.ld.0 = load %CPT.0**, %CPT.0*** %cast.2, align 8
+    %.ld.0 = load %CPT.0*, %CPT.0** %cpv2.ld.0, align 8
     %icmp.0 = icmp eq %CPT.0* %cpv1.ld.0, %.ld.0
     %zext.0 = zext i1 %icmp.0 to i8
     store i8 %zext.0, i8* %b1, align 1
-    %cpv2.ld.1 = load %CPT.0*, %CPT.0** %cpv2
+    %cpv2.ld.1 = load %CPT.0*, %CPT.0** %cpv2, align 8
     %cast.3 = bitcast %CPT.0* %cpv2.ld.1 to %CPT.0**
     %icmp.1 = icmp eq %CPT.0** %cpv1, %cast.3
     %zext.1 = zext i1 %icmp.1 to i8
     store i8 %zext.1, i8* %b2, align 1
-    %cpv1.ld.1 = load %CPT.0*, %CPT.0** %cpv1
+    %cpv1.ld.1 = load %CPT.0*, %CPT.0** %cpv1, align 8
     %cast.4 = bitcast %CPT.0** %cpv2 to %CPT.0***
-    %cpv2.ld.2 = load %CPT.0**, %CPT.0*** %cast.4
+    %cpv2.ld.2 = load %CPT.0**, %CPT.0*** %cast.4, align 8
     %cast.5 = bitcast %CPT.0** %cpv2.ld.2 to %CPT.0***
-    %deref.ld.0 = load %CPT.0**, %CPT.0*** %cast.5
+    %deref.ld.0 = load %CPT.0**, %CPT.0*** %cast.5, align 8
     %cast.6 = bitcast %CPT.0** %deref.ld.0 to %CPT.0***
-    %deref.ld.1 = load %CPT.0**, %CPT.0*** %cast.6
-    %.ld.1 = load %CPT.0*, %CPT.0** %deref.ld.1
+    %deref.ld.1 = load %CPT.0**, %CPT.0*** %cast.6, align 8
+    %.ld.1 = load %CPT.0*, %CPT.0** %deref.ld.1, align 8
     %icmp.2 = icmp eq %CPT.0* %cpv1.ld.1, %.ld.1
     %zext.2 = zext i1 %icmp.2 to i8
     store i8 %zext.2, i8* %b3, align 1
@@ -428,9 +428,9 @@
     store %CPT.0* %cast.0, %CPT.0** %x, align 8
     store %CPT.0** %x, %CPT.0*** %y, align 8
     store i8 0, i8* %b1, align 1
-    %x.ld.0 = load %CPT.0*, %CPT.0** %x
-    %y.ld.0 = load %CPT.0**, %CPT.0*** %y
-    %.ld.0 = load %CPT.0*, %CPT.0** %y.ld.0
+    %x.ld.0 = load %CPT.0*, %CPT.0** %x, align 8
+    %y.ld.0 = load %CPT.0**, %CPT.0*** %y, align 8
+    %.ld.0 = load %CPT.0*, %CPT.0** %y.ld.0, align 8
     %icmp.0 = icmp eq %CPT.0* %x.ld.0, %.ld.0
     %zext.0 = zext i1 %icmp.0 to i8
     store i8 %zext.0, i8* %b1, align 1
@@ -477,12 +477,12 @@
   }
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    %param3.ld.0 = load i64*, i64** %param3.addr
+    %param3.ld.0 = load i64*, i64** %param3.addr, align 8
     %ptroff.0 = getelementptr i64, i64* %param3.ld.0, i32 5
     store i64 9, i64* %ptroff.0, align 8
-    %param3.ld.1 = load i64*, i64** %param3.addr
+    %param3.ld.1 = load i64*, i64** %param3.addr, align 8
     %ptroff.1 = getelementptr i64, i64* %param3.ld.1, i32 7
-    %.ptroff.ld.0 = load i64, i64* %ptroff.1
+    %.ptroff.ld.0 = load i64, i64* %ptroff.1, align 8
     %trunc.0 = trunc i64 %.ptroff.ld.0 to i32
     store i32 %trunc.0, i32* %param1.addr, align 4
   )RAW_RESULT");
diff --git a/unittests/BackendCore/BackendStmtTests.cpp b/unittests/BackendCore/BackendStmtTests.cpp
index 8c2c863..96ed5c5 100644
--- a/unittests/BackendCore/BackendStmtTests.cpp
+++ b/unittests/BackendCore/BackendStmtTests.cpp
@@ -83,7 +83,7 @@
     store i64 0, i64* %loc1, align 8
     store i64 123, i64* %loc1, align 8
     store i64 0, i64* %loc2, align 8
-    %loc1.ld.0 = load i64, i64* %loc1
+    %loc1.ld.0 = load i64, i64* %loc1, align 8
     store i64 %loc1.ld.0, i64* %loc2, align 8
   )RAW_RESULT");
   bool isOK = h.expectBlock(exp);
@@ -116,7 +116,7 @@
   Bstatement *ret = h.mkReturn(ve1);
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
-    %loc1.ld.0 = load i64, i64* %loc1
+    %loc1.ld.0 = load i64, i64* %loc1, align 8
     ret i64 %loc1.ld.0
   )RAW_RESULT");
   std::string reason;
@@ -159,15 +159,15 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define i64 @foo(i8* nest %nest.0, i32 %param1, i32 %param2, i64* %param3) #0 {
   entry:
-    %param1.addr = alloca i32
-    %param2.addr = alloca i32
-    %param3.addr = alloca i64*
-    %x = alloca i64
+    %param1.addr = alloca i32, align 4
+    %param2.addr = alloca i32, align 4
+    %param3.addr = alloca i64*, align 8
+    %x = alloca i64, align 8
     store i32 %param1, i32* %param1.addr, align 4
     store i32 %param2, i32* %param2.addr, align 4
     store i64* %param3, i64** %param3.addr, align 8
     store i64 10, i64* %x, align 8
-    %x.ld.0 = load i64, i64* %x
+    %x.ld.0 = load i64, i64* %x, align 8
     ret i64 %x.ld.0
   }
   )RAW_RESULT");
@@ -242,7 +242,7 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
   entry:
-    %loc1 = alloca i8
+    %loc1 = alloca i8, align 1
     store i8 0, i8* %loc1, align 1
     call void @bar(i8* nest undef, i8* blockaddress(@foo, %label.0))
     br label %label.0
@@ -313,11 +313,11 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define i64 @foo(i8* nest %nest.0, i32 %param1, i32 %param2, i64* %param3) #0 {
   entry:
-    %param1.addr = alloca i32
-    %param2.addr = alloca i32
-    %param3.addr = alloca i64*
-    %loc1 = alloca i64
-    %loc2 = alloca i64
+    %param1.addr = alloca i32, align 4
+    %param2.addr = alloca i32, align 4
+    %param3.addr = alloca i64*, align 8
+    %loc1 = alloca i64, align 8
+    %loc2 = alloca i64, align 8
     store i32 %param1, i32* %param1.addr, align 4
     store i32 %param2, i32* %param2.addr, align 4
     store i64* %param3, i64** %param3.addr, align 8
@@ -437,16 +437,16 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define i64 @foo(i8* nest %nest.0, i32 %param1, i32 %param2, i64* %param3) #0 {
   entry:
-    %param1.addr = alloca i32
-    %param2.addr = alloca i32
-    %param3.addr = alloca i64*
-    %loc1 = alloca i64
-    %tmpv.0 = alloca i64
+    %param1.addr = alloca i32, align 4
+    %param2.addr = alloca i32, align 4
+    %param3.addr = alloca i64*, align 8
+    %loc1 = alloca i64, align 8
+    %tmpv.0 = alloca i64, align 8
     store i32 %param1, i32* %param1.addr, align 4
     store i32 %param2, i32* %param2.addr, align 4
     store i64* %param3, i64** %param3.addr, align 8
     store i64 0, i64* %loc1, align 8
-    %loc1.ld.4 = load i64, i64* %loc1
+    %loc1.ld.4 = load i64, i64* %loc1, align 8
     switch i64 %loc1.ld.4, label %default.0 [
       i64 1, label %case.0
       i64 2, label %case.0
@@ -456,13 +456,13 @@
     ]
   
   case.0:                                           ; preds = %entry, %entry
-    %loc1.ld.0 = load i64, i64* %loc1
+    %loc1.ld.0 = load i64, i64* %loc1, align 8
     %div.0 = sdiv i64 %loc1.ld.0, 123
     store i64 %div.0, i64* %loc1, align 8
     br label %label.0
   
   case.1:                                           ; preds = %entry, %entry
-    %loc1.ld.1 = load i64, i64* %loc1
+    %loc1.ld.1 = load i64, i64* %loc1, align 8
     %icmp.0 = icmp sle i64 %loc1.ld.1, 987
     %zext.0 = zext i1 %icmp.0 to i8
     %trunc.0 = trunc i8 %zext.0 to i1
@@ -479,17 +479,17 @@
     ret i64 10101
   
   then.0:                                           ; preds = %case.1
-    %loc1.ld.3 = load i64, i64* %loc1
+    %loc1.ld.3 = load i64, i64* %loc1, align 8
     store i64 %loc1.ld.3, i64* %tmpv.0, align 8
     br label %fallthrough.0
   
   fallthrough.0:                                    ; preds = %else.0, %then.0
-    %tmpv.0.ld.0 = load i64, i64* %tmpv.0
+    %tmpv.0.ld.0 = load i64, i64* %tmpv.0, align 8
     store i64 %tmpv.0.ld.0, i64* %loc1, align 8
     br label %case.2
   
   else.0:                                           ; preds = %case.1
-    %loc1.ld.2 = load i64, i64* %loc1
+    %loc1.ld.2 = load i64, i64* %loc1, align 8
     %mul.0 = mul i64 987, %loc1.ld.2
     store i64 %mul.0, i64* %tmpv.0, align 8
     br label %fallthrough.0
@@ -586,7 +586,7 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 personality i32 (i32, i32, i64, i8*, i8*)* @__gccgo_personality_v0 {
   entry:
-    %x = alloca i8
+    %x = alloca i8, align 1
     store i8 0, i8* %x, align 1
     br label %finish.0
   
@@ -699,12 +699,12 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @baz(i8* nest %nest.0) #0 personality i32 (i32, i32, i64, i8*, i8*)* @__gccgo_personality_v0 {
   entry:
-    %ehtmp.0 = alloca { i8*, i32 }
-    %x = alloca i64
-    %y = alloca i8
-    %sret.actual.0 = alloca { i8, i8 }
-    %sret.actual.1 = alloca { i8, i8 }
-    %finvar.0 = alloca i8
+    %ehtmp.0 = alloca { i8*, i32 }, align 8
+    %x = alloca i64, align 8
+    %y = alloca i8, align 1
+    %sret.actual.0 = alloca { i8, i8 }, align 8
+    %sret.actual.1 = alloca { i8, i8 }, align 8
+    %finvar.0 = alloca i8, align 1
     store i64 0, i64* %x, align 8
     store i8 0, i8* %y, align 1
     %call.0 = invoke i64 @id(i8* nest undef, i64 99)
@@ -888,11 +888,11 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define i64 @baz(i8* nest %nest.0, i64 %p0) #0 personality i32 (i32, i32, i64, i8*, i8*)* @__gccgo_personality_v0 {
   entry:
-    %ehtmp.0 = alloca { i8*, i32 }
-    %p0.addr = alloca i64
-    %ret = alloca i64
-    %x = alloca i8
-    %finvar.0 = alloca i8
+    %ehtmp.0 = alloca { i8*, i32 }, align 8
+    %p0.addr = alloca i64, align 8
+    %ret = alloca i64, align 8
+    %x = alloca i8, align 1
+    %finvar.0 = alloca i8, align 1
     store i64 %p0, i64* %p0.addr, align 8
     store i64 0, i64* %ret, align 8
     store i8 0, i8* %x, align 1
@@ -953,7 +953,7 @@
             to label %cont.0 unwind label %pad.0
   
   else.0:                                           ; preds = %cont.1
-    %p0.ld.0 = load i64, i64* %p0.addr
+    %p0.ld.0 = load i64, i64* %p0.addr, align 8
     store i64 %p0.ld.0, i64* %ret, align 8
     store i8 1, i8* %finvar.0, align 1
     invoke void @deferreturn(i8* nest undef, i8* %x)
@@ -967,7 +967,7 @@
     resume { i8*, i32 } %excv.0
   
   finret.0:                                         ; preds = %cont.0
-    %ret.ld.1 = load i64, i64* %ret
+    %ret.ld.1 = load i64, i64* %ret, align 8
     ret i64 %ret.ld.1
   }
   )RAW_RESULT");
diff --git a/unittests/BackendCore/BackendVarTests.cpp b/unittests/BackendCore/BackendVarTests.cpp
index 3c78a51..29f18e1 100644
--- a/unittests/BackendCore/BackendVarTests.cpp
+++ b/unittests/BackendCore/BackendVarTests.cpp
@@ -65,8 +65,8 @@
   Bexpression *ve2 = be->var_expression(loc1, Location());
   ASSERT_TRUE(ve2 != nullptr);
   Bstatement *es = h.mkExprStmt(ve2);
-  EXPECT_EQ(repr(ve2->value()), "%loc1 = alloca i64");
-  EXPECT_EQ(repr(es), "%loc1.ld.0 = load i64, i64* %loc1");
+  EXPECT_EQ(repr(ve2->value()), "%loc1 = alloca i64, align 8");
+  EXPECT_EQ(repr(es), "%loc1.ld.0 = load i64, i64* %loc1, align 8");
 
   // Make sure error detection is working
   Bvariable *loce = be->local_variable(func1, "", be->error_type(), nullptr,
@@ -536,8 +536,8 @@
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
   entry:
-    %x = alloca i32
-    %y = alloca { i32, i32 }
+    %x = alloca i32, align 4
+    %y = alloca { i32, i32 }, align 8
     %0 = bitcast i32* %x to i8*
     call void @llvm.lifetime.start.p0i8(i64 4, i8* %0)
     %1 = bitcast { i32, i32 }* %y to i8*
@@ -546,7 +546,7 @@
     %cast.0 = bitcast { i32, i32 }* %y to i8*
     call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %cast.0, i8* align 4 bitcast ({ i32, i32 }* @const.0 to i8*), i64 8, i1 false)
     %field.0 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %y, i32 0, i32 1
-    %y.field.ld.0 = load i32, i32* %field.0
+    %y.field.ld.0 = load i32, i32* %field.0, align 4
     store i32 %y.field.ld.0, i32* %x, align 4
     %2 = bitcast i32* %x to i8*
     call void @llvm.lifetime.end.p0i8(i64 4, i8* %2)
@@ -682,11 +682,11 @@
 
   DECLARE_EXPECTED_OUTPUT(exp, R"RAW_RESULT(
     define void @foo(i8* nest %nest.0) #0 {
-    entry:
-      %localemptys2f = alloca { {}, {} }
-      %localemptyintar = alloca [0 x i32]
-      ret void
-    }
+  entry:
+    %localemptys2f = alloca { {}, {} }, align 8
+    %localemptyintar = alloca [0 x i32], align 4
+    ret void
+  }
   )RAW_RESULT");
   bool isOK = h.expectValue(func->function(), exp);
   EXPECT_TRUE(isOK && "Value does not have expected contents");