| // Copyright 2016 The Go Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style | 
 | // license that can be found in the LICENSE file. | 
 |  | 
 | package x86 | 
 |  | 
 | import ( | 
 | 	"cmd/compile/internal/gc" | 
 | 	"cmd/compile/internal/ssa" | 
 | 	"cmd/compile/internal/types" | 
 | 	"cmd/internal/obj" | 
 | 	"cmd/internal/obj/x86" | 
 | 	"math" | 
 | ) | 
 |  | 
 | // Generates code for v using 387 instructions. | 
 | func ssaGenValue387(s *gc.SSAGenState, v *ssa.Value) { | 
 | 	// The SSA compiler pretends that it has an SSE backend. | 
 | 	// If we don't have one of those, we need to translate | 
 | 	// all the SSE ops to equivalent 387 ops. That's what this | 
 | 	// function does. | 
 |  | 
 | 	switch v.Op { | 
 | 	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst: | 
 | 		iv := uint64(v.AuxInt) | 
 | 		if iv == 0x0000000000000000 { // +0.0 | 
 | 			s.Prog(x86.AFLDZ) | 
 | 		} else if iv == 0x3ff0000000000000 { // +1.0 | 
 | 			s.Prog(x86.AFLD1) | 
 | 		} else if iv == 0x8000000000000000 { // -0.0 | 
 | 			s.Prog(x86.AFLDZ) | 
 | 			s.Prog(x86.AFCHS) | 
 | 		} else if iv == 0xbff0000000000000 { // -1.0 | 
 | 			s.Prog(x86.AFLD1) | 
 | 			s.Prog(x86.AFCHS) | 
 | 		} else if iv == 0x400921fb54442d18 { // +pi | 
 | 			s.Prog(x86.AFLDPI) | 
 | 		} else if iv == 0xc00921fb54442d18 { // -pi | 
 | 			s.Prog(x86.AFLDPI) | 
 | 			s.Prog(x86.AFCHS) | 
 | 		} else { // others | 
 | 			p := s.Prog(loadPush(v.Type)) | 
 | 			p.From.Type = obj.TYPE_FCONST | 
 | 			p.From.Val = math.Float64frombits(iv) | 
 | 			p.To.Type = obj.TYPE_REG | 
 | 			p.To.Reg = x86.REG_F0 | 
 | 		} | 
 | 		popAndSave(s, v) | 
 |  | 
 | 	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2: | 
 | 		p := s.Prog(loadPush(v.Type)) | 
 | 		p.From.Type = obj.TYPE_MEM | 
 | 		p.From.Reg = v.Args[0].Reg() | 
 | 		p.To.Type = obj.TYPE_REG | 
 | 		p.To.Reg = x86.REG_F0 | 
 | 		popAndSave(s, v) | 
 |  | 
 | 	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1, ssa.Op386MOVSSloadidx4, ssa.Op386MOVSDloadidx8: | 
 | 		p := s.Prog(loadPush(v.Type)) | 
 | 		p.From.Type = obj.TYPE_MEM | 
 | 		p.From.Reg = v.Args[0].Reg() | 
 | 		gc.AddAux(&p.From, v) | 
 | 		switch v.Op { | 
 | 		case ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1: | 
 | 			p.From.Scale = 1 | 
 | 			p.From.Index = v.Args[1].Reg() | 
 | 			if p.From.Index == x86.REG_SP { | 
 | 				p.From.Reg, p.From.Index = p.From.Index, p.From.Reg | 
 | 			} | 
 | 		case ssa.Op386MOVSSloadidx4: | 
 | 			p.From.Scale = 4 | 
 | 			p.From.Index = v.Args[1].Reg() | 
 | 		case ssa.Op386MOVSDloadidx8: | 
 | 			p.From.Scale = 8 | 
 | 			p.From.Index = v.Args[1].Reg() | 
 | 		} | 
 | 		p.To.Type = obj.TYPE_REG | 
 | 		p.To.Reg = x86.REG_F0 | 
 | 		popAndSave(s, v) | 
 |  | 
 | 	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore: | 
 | 		// Push to-be-stored value on top of stack. | 
 | 		push(s, v.Args[1]) | 
 |  | 
 | 		// Pop and store value. | 
 | 		var op obj.As | 
 | 		switch v.Op { | 
 | 		case ssa.Op386MOVSSstore: | 
 | 			op = x86.AFMOVFP | 
 | 		case ssa.Op386MOVSDstore: | 
 | 			op = x86.AFMOVDP | 
 | 		} | 
 | 		p := s.Prog(op) | 
 | 		p.From.Type = obj.TYPE_REG | 
 | 		p.From.Reg = x86.REG_F0 | 
 | 		p.To.Type = obj.TYPE_MEM | 
 | 		p.To.Reg = v.Args[0].Reg() | 
 | 		gc.AddAux(&p.To, v) | 
 |  | 
 | 	case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVSDstoreidx8: | 
 | 		push(s, v.Args[2]) | 
 | 		var op obj.As | 
 | 		switch v.Op { | 
 | 		case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSSstoreidx4: | 
 | 			op = x86.AFMOVFP | 
 | 		case ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSDstoreidx8: | 
 | 			op = x86.AFMOVDP | 
 | 		} | 
 | 		p := s.Prog(op) | 
 | 		p.From.Type = obj.TYPE_REG | 
 | 		p.From.Reg = x86.REG_F0 | 
 | 		p.To.Type = obj.TYPE_MEM | 
 | 		p.To.Reg = v.Args[0].Reg() | 
 | 		gc.AddAux(&p.To, v) | 
 | 		switch v.Op { | 
 | 		case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1: | 
 | 			p.To.Scale = 1 | 
 | 			p.To.Index = v.Args[1].Reg() | 
 | 			if p.To.Index == x86.REG_SP { | 
 | 				p.To.Reg, p.To.Index = p.To.Index, p.To.Reg | 
 | 			} | 
 | 		case ssa.Op386MOVSSstoreidx4: | 
 | 			p.To.Scale = 4 | 
 | 			p.To.Index = v.Args[1].Reg() | 
 | 		case ssa.Op386MOVSDstoreidx8: | 
 | 			p.To.Scale = 8 | 
 | 			p.To.Index = v.Args[1].Reg() | 
 | 		} | 
 |  | 
 | 	case ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD, | 
 | 		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD: | 
 | 		if v.Reg() != v.Args[0].Reg() { | 
 | 			v.Fatalf("input[0] and output not in same register %s", v.LongString()) | 
 | 		} | 
 |  | 
 | 		// Push arg1 on top of stack | 
 | 		push(s, v.Args[1]) | 
 |  | 
 | 		// Set precision if needed.  64 bits is the default. | 
 | 		switch v.Op { | 
 | 		case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS: | 
 | 			p := s.Prog(x86.AFSTCW) | 
 | 			s.AddrScratch(&p.To) | 
 | 			p = s.Prog(x86.AFLDCW) | 
 | 			p.From.Type = obj.TYPE_MEM | 
 | 			p.From.Name = obj.NAME_EXTERN | 
 | 			p.From.Sym = gc.ControlWord32 | 
 | 		} | 
 |  | 
 | 		var op obj.As | 
 | 		switch v.Op { | 
 | 		case ssa.Op386ADDSS, ssa.Op386ADDSD: | 
 | 			op = x86.AFADDDP | 
 | 		case ssa.Op386SUBSS, ssa.Op386SUBSD: | 
 | 			op = x86.AFSUBDP | 
 | 		case ssa.Op386MULSS, ssa.Op386MULSD: | 
 | 			op = x86.AFMULDP | 
 | 		case ssa.Op386DIVSS, ssa.Op386DIVSD: | 
 | 			op = x86.AFDIVDP | 
 | 		} | 
 | 		p := s.Prog(op) | 
 | 		p.From.Type = obj.TYPE_REG | 
 | 		p.From.Reg = x86.REG_F0 | 
 | 		p.To.Type = obj.TYPE_REG | 
 | 		p.To.Reg = s.SSEto387[v.Reg()] + 1 | 
 |  | 
 | 		// Restore precision if needed. | 
 | 		switch v.Op { | 
 | 		case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS: | 
 | 			p := s.Prog(x86.AFLDCW) | 
 | 			s.AddrScratch(&p.From) | 
 | 		} | 
 |  | 
 | 	case ssa.Op386UCOMISS, ssa.Op386UCOMISD: | 
 | 		push(s, v.Args[0]) | 
 |  | 
 | 		// Compare. | 
 | 		p := s.Prog(x86.AFUCOMP) | 
 | 		p.From.Type = obj.TYPE_REG | 
 | 		p.From.Reg = x86.REG_F0 | 
 | 		p.To.Type = obj.TYPE_REG | 
 | 		p.To.Reg = s.SSEto387[v.Args[1].Reg()] + 1 | 
 |  | 
 | 		// Save AX. | 
 | 		p = s.Prog(x86.AMOVL) | 
 | 		p.From.Type = obj.TYPE_REG | 
 | 		p.From.Reg = x86.REG_AX | 
 | 		s.AddrScratch(&p.To) | 
 |  | 
 | 		// Move status word into AX. | 
 | 		p = s.Prog(x86.AFSTSW) | 
 | 		p.To.Type = obj.TYPE_REG | 
 | 		p.To.Reg = x86.REG_AX | 
 |  | 
 | 		// Then move the flags we need to the integer flags. | 
 | 		s.Prog(x86.ASAHF) | 
 |  | 
 | 		// Restore AX. | 
 | 		p = s.Prog(x86.AMOVL) | 
 | 		s.AddrScratch(&p.From) | 
 | 		p.To.Type = obj.TYPE_REG | 
 | 		p.To.Reg = x86.REG_AX | 
 |  | 
 | 	case ssa.Op386SQRTSD: | 
 | 		push(s, v.Args[0]) | 
 | 		s.Prog(x86.AFSQRT) | 
 | 		popAndSave(s, v) | 
 |  | 
 | 	case ssa.Op386FCHS: | 
 | 		push(s, v.Args[0]) | 
 | 		s.Prog(x86.AFCHS) | 
 | 		popAndSave(s, v) | 
 |  | 
 | 	case ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD: | 
 | 		p := s.Prog(x86.AMOVL) | 
 | 		p.From.Type = obj.TYPE_REG | 
 | 		p.From.Reg = v.Args[0].Reg() | 
 | 		s.AddrScratch(&p.To) | 
 | 		p = s.Prog(x86.AFMOVL) | 
 | 		s.AddrScratch(&p.From) | 
 | 		p.To.Type = obj.TYPE_REG | 
 | 		p.To.Reg = x86.REG_F0 | 
 | 		popAndSave(s, v) | 
 |  | 
 | 	case ssa.Op386CVTTSD2SL, ssa.Op386CVTTSS2SL: | 
 | 		push(s, v.Args[0]) | 
 |  | 
 | 		// Save control word. | 
 | 		p := s.Prog(x86.AFSTCW) | 
 | 		s.AddrScratch(&p.To) | 
 | 		p.To.Offset += 4 | 
 |  | 
 | 		// Load control word which truncates (rounds towards zero). | 
 | 		p = s.Prog(x86.AFLDCW) | 
 | 		p.From.Type = obj.TYPE_MEM | 
 | 		p.From.Name = obj.NAME_EXTERN | 
 | 		p.From.Sym = gc.ControlWord64trunc | 
 |  | 
 | 		// Now do the conversion. | 
 | 		p = s.Prog(x86.AFMOVLP) | 
 | 		p.From.Type = obj.TYPE_REG | 
 | 		p.From.Reg = x86.REG_F0 | 
 | 		s.AddrScratch(&p.To) | 
 | 		p = s.Prog(x86.AMOVL) | 
 | 		s.AddrScratch(&p.From) | 
 | 		p.To.Type = obj.TYPE_REG | 
 | 		p.To.Reg = v.Reg() | 
 |  | 
 | 		// Restore control word. | 
 | 		p = s.Prog(x86.AFLDCW) | 
 | 		s.AddrScratch(&p.From) | 
 | 		p.From.Offset += 4 | 
 |  | 
 | 	case ssa.Op386CVTSS2SD: | 
 | 		// float32 -> float64 is a nop | 
 | 		push(s, v.Args[0]) | 
 | 		popAndSave(s, v) | 
 |  | 
 | 	case ssa.Op386CVTSD2SS: | 
 | 		// Round to nearest float32. | 
 | 		push(s, v.Args[0]) | 
 | 		p := s.Prog(x86.AFMOVFP) | 
 | 		p.From.Type = obj.TYPE_REG | 
 | 		p.From.Reg = x86.REG_F0 | 
 | 		s.AddrScratch(&p.To) | 
 | 		p = s.Prog(x86.AFMOVF) | 
 | 		s.AddrScratch(&p.From) | 
 | 		p.To.Type = obj.TYPE_REG | 
 | 		p.To.Reg = x86.REG_F0 | 
 | 		popAndSave(s, v) | 
 |  | 
 | 	case ssa.OpLoadReg: | 
 | 		if !v.Type.IsFloat() { | 
 | 			ssaGenValue(s, v) | 
 | 			return | 
 | 		} | 
 | 		// Load+push the value we need. | 
 | 		p := s.Prog(loadPush(v.Type)) | 
 | 		gc.AddrAuto(&p.From, v.Args[0]) | 
 | 		p.To.Type = obj.TYPE_REG | 
 | 		p.To.Reg = x86.REG_F0 | 
 | 		// Move the value to its assigned register. | 
 | 		popAndSave(s, v) | 
 |  | 
 | 	case ssa.OpStoreReg: | 
 | 		if !v.Type.IsFloat() { | 
 | 			ssaGenValue(s, v) | 
 | 			return | 
 | 		} | 
 | 		push(s, v.Args[0]) | 
 | 		var op obj.As | 
 | 		switch v.Type.Size() { | 
 | 		case 4: | 
 | 			op = x86.AFMOVFP | 
 | 		case 8: | 
 | 			op = x86.AFMOVDP | 
 | 		} | 
 | 		p := s.Prog(op) | 
 | 		p.From.Type = obj.TYPE_REG | 
 | 		p.From.Reg = x86.REG_F0 | 
 | 		gc.AddrAuto(&p.To, v) | 
 |  | 
 | 	case ssa.OpCopy: | 
 | 		if !v.Type.IsFloat() { | 
 | 			ssaGenValue(s, v) | 
 | 			return | 
 | 		} | 
 | 		push(s, v.Args[0]) | 
 | 		popAndSave(s, v) | 
 |  | 
 | 	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter: | 
 | 		flush387(s) // Calls must empty the FP stack. | 
 | 		fallthrough // then issue the call as normal | 
 | 	default: | 
 | 		ssaGenValue(s, v) | 
 | 	} | 
 | } | 
 |  | 
 | // push pushes v onto the floating-point stack.  v must be in a register. | 
 | func push(s *gc.SSAGenState, v *ssa.Value) { | 
 | 	p := s.Prog(x86.AFMOVD) | 
 | 	p.From.Type = obj.TYPE_REG | 
 | 	p.From.Reg = s.SSEto387[v.Reg()] | 
 | 	p.To.Type = obj.TYPE_REG | 
 | 	p.To.Reg = x86.REG_F0 | 
 | } | 
 |  | 
 | // popAndSave pops a value off of the floating-point stack and stores | 
 | // it in the reigster assigned to v. | 
 | func popAndSave(s *gc.SSAGenState, v *ssa.Value) { | 
 | 	r := v.Reg() | 
 | 	if _, ok := s.SSEto387[r]; ok { | 
 | 		// Pop value, write to correct register. | 
 | 		p := s.Prog(x86.AFMOVDP) | 
 | 		p.From.Type = obj.TYPE_REG | 
 | 		p.From.Reg = x86.REG_F0 | 
 | 		p.To.Type = obj.TYPE_REG | 
 | 		p.To.Reg = s.SSEto387[v.Reg()] + 1 | 
 | 	} else { | 
 | 		// Don't actually pop value. This 387 register is now the | 
 | 		// new home for the not-yet-assigned-a-home SSE register. | 
 | 		// Increase the register mapping of all other registers by one. | 
 | 		for rSSE, r387 := range s.SSEto387 { | 
 | 			s.SSEto387[rSSE] = r387 + 1 | 
 | 		} | 
 | 		s.SSEto387[r] = x86.REG_F0 | 
 | 	} | 
 | } | 
 |  | 
 | // loadPush returns the opcode for load+push of the given type. | 
 | func loadPush(t *types.Type) obj.As { | 
 | 	if t.Size() == 4 { | 
 | 		return x86.AFMOVF | 
 | 	} | 
 | 	return x86.AFMOVD | 
 | } | 
 |  | 
 | // flush387 removes all entries from the 387 floating-point stack. | 
 | func flush387(s *gc.SSAGenState) { | 
 | 	for k := range s.SSEto387 { | 
 | 		p := s.Prog(x86.AFMOVDP) | 
 | 		p.From.Type = obj.TYPE_REG | 
 | 		p.From.Reg = x86.REG_F0 | 
 | 		p.To.Type = obj.TYPE_REG | 
 | 		p.To.Reg = x86.REG_F0 | 
 | 		delete(s.SSEto387, k) | 
 | 	} | 
 | } | 
 |  | 
 | func ssaGenBlock387(s *gc.SSAGenState, b, next *ssa.Block) { | 
 | 	// Empty the 387's FP stack before the block ends. | 
 | 	flush387(s) | 
 |  | 
 | 	ssaGenBlock(s, b, next) | 
 | } |