diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go
index 22b28a9..4358851 100644
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -1054,7 +1054,11 @@
 		ssa.OpARM64LessThanF,
 		ssa.OpARM64LessEqualF,
 		ssa.OpARM64GreaterThanF,
-		ssa.OpARM64GreaterEqualF:
+		ssa.OpARM64GreaterEqualF,
+		ssa.OpARM64NotLessThanF,
+		ssa.OpARM64NotLessEqualF,
+		ssa.OpARM64NotGreaterThanF,
+		ssa.OpARM64NotGreaterEqualF:
 		// generate boolean values using CSET
 		p := s.Prog(arm64.ACSET)
 		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
@@ -1098,10 +1102,16 @@
 	ssa.OpARM64GreaterThanU:  arm64.COND_HI,
 	ssa.OpARM64GreaterEqual:  arm64.COND_GE,
 	ssa.OpARM64GreaterEqualU: arm64.COND_HS,
-	ssa.OpARM64LessThanF:     arm64.COND_MI,
-	ssa.OpARM64LessEqualF:    arm64.COND_LS,
-	ssa.OpARM64GreaterThanF:  arm64.COND_GT,
-	ssa.OpARM64GreaterEqualF: arm64.COND_GE,
+	ssa.OpARM64LessThanF:     arm64.COND_MI, // Less than
+	ssa.OpARM64LessEqualF:    arm64.COND_LS, // Less than or equal to
+	ssa.OpARM64GreaterThanF:  arm64.COND_GT, // Greater than
+	ssa.OpARM64GreaterEqualF: arm64.COND_GE, // Greater than or equal to
+
+	// The following condition codes have unordered to handle comparisons related to NaN.
+	ssa.OpARM64NotLessThanF:     arm64.COND_PL, // Greater than, equal to, or unordered
+	ssa.OpARM64NotLessEqualF:    arm64.COND_HI, // Greater than or unordered
+	ssa.OpARM64NotGreaterThanF:  arm64.COND_LE, // Less than, equal to or unordered
+	ssa.OpARM64NotGreaterEqualF: arm64.COND_LT, // Less than or unordered
 }
 
 var blockJump = map[ssa.BlockKind]struct {
diff --git a/src/cmd/compile/internal/gc/embed.go b/src/cmd/compile/internal/gc/embed.go
index 103949c..f45796c 100644
--- a/src/cmd/compile/internal/gc/embed.go
+++ b/src/cmd/compile/internal/gc/embed.go
@@ -47,9 +47,7 @@
 	embedFiles
 )
 
-var numLocalEmbed int
-
-func varEmbed(p *noder, names []*Node, typ *Node, exprs []*Node, embeds []PragmaEmbed) (newExprs []*Node) {
+func varEmbed(p *noder, names []*Node, typ *Node, exprs []*Node, embeds []PragmaEmbed) {
 	haveEmbed := false
 	for _, decl := range p.file.DeclList {
 		imp, ok := decl.(*syntax.ImportDecl)
@@ -67,44 +65,48 @@
 	pos := embeds[0].Pos
 	if !haveEmbed {
 		p.yyerrorpos(pos, "invalid go:embed: missing import \"embed\"")
-		return exprs
-	}
-	if embedCfg.Patterns == nil {
-		p.yyerrorpos(pos, "invalid go:embed: build system did not supply embed configuration")
-		return exprs
+		return
 	}
 	if len(names) > 1 {
 		p.yyerrorpos(pos, "go:embed cannot apply to multiple vars")
-		return exprs
+		return
 	}
 	if len(exprs) > 0 {
 		p.yyerrorpos(pos, "go:embed cannot apply to var with initializer")
-		return exprs
+		return
 	}
 	if typ == nil {
 		// Should not happen, since len(exprs) == 0 now.
 		p.yyerrorpos(pos, "go:embed cannot apply to var without type")
-		return exprs
+		return
+	}
+	if dclcontext != PEXTERN {
+		p.yyerrorpos(pos, "go:embed cannot apply to var inside func")
+		return
 	}
 
-	kind := embedKindApprox(typ)
-	if kind == embedUnknown {
-		p.yyerrorpos(pos, "go:embed cannot apply to var of type %v", typ)
-		return exprs
+	var list []irEmbed
+	for _, e := range embeds {
+		list = append(list, irEmbed{Pos: p.makeXPos(e.Pos), Patterns: e.Patterns})
 	}
+	v := names[0]
+	v.Name.Param.SetEmbedList(list)
+	embedlist = append(embedlist, v)
+}
 
+func embedFileList(v *Node, kind int) []string {
 	// Build list of files to store.
 	have := make(map[string]bool)
 	var list []string
-	for _, e := range embeds {
+	for _, e := range v.Name.Param.EmbedList() {
 		for _, pattern := range e.Patterns {
 			files, ok := embedCfg.Patterns[pattern]
 			if !ok {
-				p.yyerrorpos(e.Pos, "invalid go:embed: build system did not map pattern: %s", pattern)
+				yyerrorl(e.Pos, "invalid go:embed: build system did not map pattern: %s", pattern)
 			}
 			for _, file := range files {
 				if embedCfg.Files[file] == "" {
-					p.yyerrorpos(e.Pos, "invalid go:embed: build system did not map file: %s", file)
+					yyerrorl(e.Pos, "invalid go:embed: build system did not map file: %s", file)
 					continue
 				}
 				if !have[file] {
@@ -126,46 +128,12 @@
 
 	if kind == embedString || kind == embedBytes {
 		if len(list) > 1 {
-			p.yyerrorpos(pos, "invalid go:embed: multiple files for type %v", typ)
-			return exprs
+			yyerrorl(v.Pos, "invalid go:embed: multiple files for type %v", v.Type)
+			return nil
 		}
 	}
 
-	v := names[0]
-	if dclcontext != PEXTERN {
-		numLocalEmbed++
-		v = newnamel(v.Pos, lookupN("embed.", numLocalEmbed))
-		v.Sym.Def = asTypesNode(v)
-		v.Name.Param.Ntype = typ
-		v.SetClass(PEXTERN)
-		externdcl = append(externdcl, v)
-		exprs = []*Node{v}
-	}
-
-	v.Name.Param.SetEmbedFiles(list)
-	embedlist = append(embedlist, v)
-	return exprs
-}
-
-// embedKindApprox determines the kind of embedding variable, approximately.
-// The match is approximate because we haven't done scope resolution yet and
-// can't tell whether "string" and "byte" really mean "string" and "byte".
-// The result must be confirmed later, after type checking, using embedKind.
-func embedKindApprox(typ *Node) int {
-	if typ.Sym != nil && typ.Sym.Name == "FS" && (typ.Sym.Pkg.Path == "embed" || (typ.Sym.Pkg == localpkg && myimportpath == "embed")) {
-		return embedFiles
-	}
-	// These are not guaranteed to match only string and []byte -
-	// maybe the local package has redefined one of those words.
-	// But it's the best we can do now during the noder.
-	// The stricter check happens later, in initEmbed calling embedKind.
-	if typ.Sym != nil && typ.Sym.Name == "string" && typ.Sym.Pkg == localpkg {
-		return embedString
-	}
-	if typ.Op == OTARRAY && typ.Left == nil && typ.Right.Sym != nil && typ.Right.Sym.Name == "byte" && typ.Right.Sym.Pkg == localpkg {
-		return embedBytes
-	}
-	return embedUnknown
+	return list
 }
 
 // embedKind determines the kind of embedding variable.
@@ -173,10 +141,10 @@
 	if typ.Sym != nil && typ.Sym.Name == "FS" && (typ.Sym.Pkg.Path == "embed" || (typ.Sym.Pkg == localpkg && myimportpath == "embed")) {
 		return embedFiles
 	}
-	if typ == types.Types[TSTRING] {
+	if typ.Etype == types.TSTRING {
 		return embedString
 	}
-	if typ.Sym == nil && typ.IsSlice() && typ.Elem() == types.Bytetype {
+	if typ.Etype == types.TSLICE && typ.Elem().Etype == types.TUINT8 {
 		return embedBytes
 	}
 	return embedUnknown
@@ -214,11 +182,26 @@
 // initEmbed emits the init data for a //go:embed variable,
 // which is either a string, a []byte, or an embed.FS.
 func initEmbed(v *Node) {
-	files := v.Name.Param.EmbedFiles()
-	switch kind := embedKind(v.Type); kind {
-	case embedUnknown:
+	commentPos := v.Name.Param.EmbedList()[0].Pos
+	if !langSupported(1, 16, localpkg) {
+		lno := lineno
+		lineno = commentPos
+		yyerrorv("go1.16", "go:embed")
+		lineno = lno
+		return
+	}
+	if embedCfg.Patterns == nil {
+		yyerrorl(commentPos, "invalid go:embed: build system did not supply embed configuration")
+		return
+	}
+	kind := embedKind(v.Type)
+	if kind == embedUnknown {
 		yyerrorl(v.Pos, "go:embed cannot apply to var of type %v", v.Type)
+		return
+	}
 
+	files := embedFileList(v, kind)
+	switch kind {
 	case embedString, embedBytes:
 		file := files[0]
 		fsym, size, err := fileStringSym(v.Pos, embedCfg.Files[file], kind == embedString, nil)
diff --git a/src/cmd/compile/internal/gc/initorder.go b/src/cmd/compile/internal/gc/initorder.go
index 41f1349..e2084fd 100644
--- a/src/cmd/compile/internal/gc/initorder.go
+++ b/src/cmd/compile/internal/gc/initorder.go
@@ -108,7 +108,7 @@
 					errorexit()
 				}
 
-				findInitLoopAndExit(firstLHS(n), new([]*Node))
+				findInitLoopAndExit(firstLHS(n), new([]*Node), make(map[*Node]bool))
 				Fatalf("initialization unfinished, but failed to identify loop")
 			}
 		}
@@ -181,10 +181,7 @@
 // path points to a slice used for tracking the sequence of
 // variables/functions visited. Using a pointer to a slice allows the
 // slice capacity to grow and limit reallocations.
-func findInitLoopAndExit(n *Node, path *[]*Node) {
-	// We implement a simple DFS loop-finding algorithm. This
-	// could be faster, but initialization cycles are rare.
-
+func findInitLoopAndExit(n *Node, path *[]*Node, ok map[*Node]bool) {
 	for i, x := range *path {
 		if x == n {
 			reportInitLoopAndExit((*path)[i:])
@@ -201,12 +198,18 @@
 	*path = append(*path, n)
 	for _, ref := range refers {
 		// Short-circuit variables that were initialized.
-		if ref.Class() == PEXTERN && ref.Name.Defn.Initorder() == InitDone {
+		if ref.Class() == PEXTERN && ref.Name.Defn.Initorder() == InitDone || ok[ref] {
 			continue
 		}
-
-		findInitLoopAndExit(ref, path)
+		findInitLoopAndExit(ref, path, ok)
 	}
+
+	// n is not involved in a cycle.
+	// Record that fact to avoid checking it again when reached another way,
+	// or else this traversal will take exponential time traversing all paths
+	// through the part of the package's call graph implicated in the cycle.
+	ok[n] = true
+
 	*path = (*path)[:len(*path)-1]
 }
 
diff --git a/src/cmd/compile/internal/gc/noder.go b/src/cmd/compile/internal/gc/noder.go
index 67d24ef..7494c3e 100644
--- a/src/cmd/compile/internal/gc/noder.go
+++ b/src/cmd/compile/internal/gc/noder.go
@@ -397,7 +397,7 @@
 					p.yyerrorpos(e.Pos, "//go:embed only allowed in Go files that import \"embed\"")
 				}
 			} else {
-				exprs = varEmbed(p, names, typ, exprs, pragma.Embeds)
+				varEmbed(p, names, typ, exprs, pragma.Embeds)
 			}
 			pragma.Embeds = nil
 		}
diff --git a/src/cmd/compile/internal/gc/syntax.go b/src/cmd/compile/internal/gc/syntax.go
index 4335833..7b4a315 100644
--- a/src/cmd/compile/internal/gc/syntax.go
+++ b/src/cmd/compile/internal/gc/syntax.go
@@ -499,7 +499,12 @@
 	alias bool
 }
 
-type embedFileList []string
+type irEmbed struct {
+	Pos      src.XPos
+	Patterns []string
+}
+
+type embedList []irEmbed
 
 // Pragma returns the PragmaFlag for p, which must be for an OTYPE.
 func (p *Param) Pragma() PragmaFlag {
@@ -547,28 +552,28 @@
 	(*p.Extra).(*paramType).alias = alias
 }
 
-// EmbedFiles returns the list of embedded files for p,
+// EmbedList returns the list of embedded files for p,
 // which must be for an ONAME var.
-func (p *Param) EmbedFiles() []string {
+func (p *Param) EmbedList() []irEmbed {
 	if p.Extra == nil {
 		return nil
 	}
-	return *(*p.Extra).(*embedFileList)
+	return *(*p.Extra).(*embedList)
 }
 
-// SetEmbedFiles sets the list of embedded files for p,
+// SetEmbedList sets the list of embedded files for p,
 // which must be for an ONAME var.
-func (p *Param) SetEmbedFiles(list []string) {
+func (p *Param) SetEmbedList(list []irEmbed) {
 	if p.Extra == nil {
 		if len(list) == 0 {
 			return
 		}
-		f := embedFileList(list)
+		f := embedList(list)
 		p.Extra = new(interface{})
 		*p.Extra = &f
 		return
 	}
-	*(*p.Extra).(*embedFileList) = list
+	*(*p.Extra).(*embedList) = list
 }
 
 // Functions
diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go
index fbde19d..679ee8a 100644
--- a/src/cmd/compile/internal/ssa/expand_calls.go
+++ b/src/cmd/compile/internal/ssa/expand_calls.go
@@ -194,7 +194,8 @@
 				}
 				break
 			}
-			if leaf.Op == OpIData {
+			switch leaf.Op {
+			case OpIData, OpStructSelect, OpArraySelect:
 				leafType = removeTrivialWrapperTypes(leaf.Type)
 			}
 			aux := selector.Aux
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
index 87db2b7..b0bc9c7 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -478,20 +478,24 @@
 		// pseudo-ops
 		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}, nilCheck: true, faultOnNilArg0: true}, // panic if arg0 is nil.  arg1=mem.
 
-		{name: "Equal", argLength: 1, reg: readflags},         // bool, true flags encode x==y false otherwise.
-		{name: "NotEqual", argLength: 1, reg: readflags},      // bool, true flags encode x!=y false otherwise.
-		{name: "LessThan", argLength: 1, reg: readflags},      // bool, true flags encode signed x<y false otherwise.
-		{name: "LessEqual", argLength: 1, reg: readflags},     // bool, true flags encode signed x<=y false otherwise.
-		{name: "GreaterThan", argLength: 1, reg: readflags},   // bool, true flags encode signed x>y false otherwise.
-		{name: "GreaterEqual", argLength: 1, reg: readflags},  // bool, true flags encode signed x>=y false otherwise.
-		{name: "LessThanU", argLength: 1, reg: readflags},     // bool, true flags encode unsigned x<y false otherwise.
-		{name: "LessEqualU", argLength: 1, reg: readflags},    // bool, true flags encode unsigned x<=y false otherwise.
-		{name: "GreaterThanU", argLength: 1, reg: readflags},  // bool, true flags encode unsigned x>y false otherwise.
-		{name: "GreaterEqualU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x>=y false otherwise.
-		{name: "LessThanF", argLength: 1, reg: readflags},     // bool, true flags encode floating-point x<y false otherwise.
-		{name: "LessEqualF", argLength: 1, reg: readflags},    // bool, true flags encode floating-point x<=y false otherwise.
-		{name: "GreaterThanF", argLength: 1, reg: readflags},  // bool, true flags encode floating-point x>y false otherwise.
-		{name: "GreaterEqualF", argLength: 1, reg: readflags}, // bool, true flags encode floating-point x>=y false otherwise.
+		{name: "Equal", argLength: 1, reg: readflags},            // bool, true flags encode x==y false otherwise.
+		{name: "NotEqual", argLength: 1, reg: readflags},         // bool, true flags encode x!=y false otherwise.
+		{name: "LessThan", argLength: 1, reg: readflags},         // bool, true flags encode signed x<y false otherwise.
+		{name: "LessEqual", argLength: 1, reg: readflags},        // bool, true flags encode signed x<=y false otherwise.
+		{name: "GreaterThan", argLength: 1, reg: readflags},      // bool, true flags encode signed x>y false otherwise.
+		{name: "GreaterEqual", argLength: 1, reg: readflags},     // bool, true flags encode signed x>=y false otherwise.
+		{name: "LessThanU", argLength: 1, reg: readflags},        // bool, true flags encode unsigned x<y false otherwise.
+		{name: "LessEqualU", argLength: 1, reg: readflags},       // bool, true flags encode unsigned x<=y false otherwise.
+		{name: "GreaterThanU", argLength: 1, reg: readflags},     // bool, true flags encode unsigned x>y false otherwise.
+		{name: "GreaterEqualU", argLength: 1, reg: readflags},    // bool, true flags encode unsigned x>=y false otherwise.
+		{name: "LessThanF", argLength: 1, reg: readflags},        // bool, true flags encode floating-point x<y false otherwise.
+		{name: "LessEqualF", argLength: 1, reg: readflags},       // bool, true flags encode floating-point x<=y false otherwise.
+		{name: "GreaterThanF", argLength: 1, reg: readflags},     // bool, true flags encode floating-point x>y false otherwise.
+		{name: "GreaterEqualF", argLength: 1, reg: readflags},    // bool, true flags encode floating-point x>=y false otherwise.
+		{name: "NotLessThanF", argLength: 1, reg: readflags},     // bool, true flags encode floating-point x>=y || x is unordered with y, false otherwise.
+		{name: "NotLessEqualF", argLength: 1, reg: readflags},    // bool, true flags encode floating-point x>y || x is unordered with y, false otherwise.
+		{name: "NotGreaterThanF", argLength: 1, reg: readflags},  // bool, true flags encode floating-point x<=y || x is unordered with y, false otherwise.
+		{name: "NotGreaterEqualF", argLength: 1, reg: readflags}, // bool, true flags encode floating-point x<y || x is unordered with y, false otherwise.
 		// duffzero
 		// arg0 = address of memory to zero
 		// arg1 = mem
diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules
index 81568b7..1784923 100644
--- a/src/cmd/compile/internal/ssa/gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/gen/generic.rules
@@ -2512,7 +2512,7 @@
 (Move {t1} [s] dst tmp1 midmem:(Move {t2} [s] tmp2 src _))
 	&& t1.Compare(t2) == types.CMPeq
 	&& isSamePtr(tmp1, tmp2)
-	&& isStackPtr(src)
+	&& isStackPtr(src) && !isVolatile(src)
 	&& disjoint(src, s, tmp2, s)
 	&& (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
 	=> (Move {t1} [s] dst src midmem)
@@ -2521,7 +2521,7 @@
 (Move {t1} [s] dst tmp1 midmem:(VarDef (Move {t2} [s] tmp2 src _)))
 	&& t1.Compare(t2) == types.CMPeq
 	&& isSamePtr(tmp1, tmp2)
-	&& isStackPtr(src)
+	&& isStackPtr(src) && !isVolatile(src)
 	&& disjoint(src, s, tmp2, s)
 	&& (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
 	=> (Move {t1} [s] dst src midmem)
diff --git a/src/cmd/compile/internal/ssa/html.go b/src/cmd/compile/internal/ssa/html.go
index a9d52fa..c06b580 100644
--- a/src/cmd/compile/internal/ssa/html.go
+++ b/src/cmd/compile/internal/ssa/html.go
@@ -9,9 +9,9 @@
 	"cmd/internal/src"
 	"fmt"
 	"html"
+	exec "internal/execabs"
 	"io"
 	"os"
-	"os/exec"
 	"path/filepath"
 	"strconv"
 	"strings"
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 83d35cf..e590f6b 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1564,6 +1564,10 @@
 	OpARM64LessEqualF
 	OpARM64GreaterThanF
 	OpARM64GreaterEqualF
+	OpARM64NotLessThanF
+	OpARM64NotLessEqualF
+	OpARM64NotGreaterThanF
+	OpARM64NotGreaterEqualF
 	OpARM64DUFFZERO
 	OpARM64LoweredZero
 	OpARM64DUFFCOPY
@@ -20799,6 +20803,42 @@
 		},
 	},
 	{
+		name:   "NotLessThanF",
+		argLen: 1,
+		reg: regInfo{
+			outputs: []outputInfo{
+				{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+			},
+		},
+	},
+	{
+		name:   "NotLessEqualF",
+		argLen: 1,
+		reg: regInfo{
+			outputs: []outputInfo{
+				{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+			},
+		},
+	},
+	{
+		name:   "NotGreaterThanF",
+		argLen: 1,
+		reg: regInfo{
+			outputs: []outputInfo{
+				{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+			},
+		},
+	},
+	{
+		name:   "NotGreaterEqualF",
+		argLen: 1,
+		reg: regInfo{
+			outputs: []outputInfo{
+				{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+			},
+		},
+	},
+	{
 		name:           "DUFFZERO",
 		auxType:        auxInt64,
 		argLen:         2,
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index 24efd38..f5d1a78 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -974,9 +974,10 @@
 }
 
 // arm64Negate finds the complement to an ARM64 condition code,
-// for example Equal -> NotEqual or LessThan -> GreaterEqual
+// for example !Equal -> NotEqual or !LessThan -> GreaterEqual
 //
-// TODO: add floating-point conditions
+// For floating point, it's more subtle because NaN is unordered. We do
+// !LessThanF -> NotLessThanF, the latter takes care of NaNs.
 func arm64Negate(op Op) Op {
 	switch op {
 	case OpARM64LessThan:
@@ -1000,13 +1001,21 @@
 	case OpARM64NotEqual:
 		return OpARM64Equal
 	case OpARM64LessThanF:
-		return OpARM64GreaterEqualF
-	case OpARM64GreaterThanF:
-		return OpARM64LessEqualF
+		return OpARM64NotLessThanF
+	case OpARM64NotLessThanF:
+		return OpARM64LessThanF
 	case OpARM64LessEqualF:
+		return OpARM64NotLessEqualF
+	case OpARM64NotLessEqualF:
+		return OpARM64LessEqualF
+	case OpARM64GreaterThanF:
+		return OpARM64NotGreaterThanF
+	case OpARM64NotGreaterThanF:
 		return OpARM64GreaterThanF
 	case OpARM64GreaterEqualF:
-		return OpARM64LessThanF
+		return OpARM64NotGreaterEqualF
+	case OpARM64NotGreaterEqualF:
+		return OpARM64GreaterEqualF
 	default:
 		panic("unreachable")
 	}
@@ -1017,8 +1026,6 @@
 // that the same result would be produced if the arguments
 // to the flag-generating instruction were reversed, e.g.
 // (InvertFlags (CMP x y)) -> (CMP y x)
-//
-// TODO: add floating-point conditions
 func arm64Invert(op Op) Op {
 	switch op {
 	case OpARM64LessThan:
@@ -1047,6 +1054,14 @@
 		return OpARM64GreaterEqualF
 	case OpARM64GreaterEqualF:
 		return OpARM64LessEqualF
+	case OpARM64NotLessThanF:
+		return OpARM64NotGreaterThanF
+	case OpARM64NotGreaterThanF:
+		return OpARM64NotLessThanF
+	case OpARM64NotLessEqualF:
+		return OpARM64NotGreaterEqualF
+	case OpARM64NotGreaterEqualF:
+		return OpARM64NotLessEqualF
 	default:
 		panic("unreachable")
 	}
diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go
index 4cb9a8f..958e24d 100644
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@@ -13637,7 +13637,7 @@
 		return true
 	}
 	// match: (Move {t1} [s] dst tmp1 midmem:(Move {t2} [s] tmp2 src _))
-	// cond: t1.Compare(t2) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
+	// cond: t1.Compare(t2) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && !isVolatile(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
 	// result: (Move {t1} [s] dst src midmem)
 	for {
 		s := auxIntToInt64(v.AuxInt)
@@ -13651,7 +13651,7 @@
 		t2 := auxToType(midmem.Aux)
 		src := midmem.Args[1]
 		tmp2 := midmem.Args[0]
-		if !(t1.Compare(t2) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))) {
+		if !(t1.Compare(t2) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && !isVolatile(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))) {
 			break
 		}
 		v.reset(OpMove)
@@ -13661,7 +13661,7 @@
 		return true
 	}
 	// match: (Move {t1} [s] dst tmp1 midmem:(VarDef (Move {t2} [s] tmp2 src _)))
-	// cond: t1.Compare(t2) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
+	// cond: t1.Compare(t2) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && !isVolatile(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
 	// result: (Move {t1} [s] dst src midmem)
 	for {
 		s := auxIntToInt64(v.AuxInt)
@@ -13679,7 +13679,7 @@
 		t2 := auxToType(midmem_0.Aux)
 		src := midmem_0.Args[1]
 		tmp2 := midmem_0.Args[0]
-		if !(t1.Compare(t2) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))) {
+		if !(t1.Compare(t2) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && !isVolatile(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))) {
 			break
 		}
 		v.reset(OpMove)