blob: 14870c9feb2bf8476a56b853913cc0e2fe237781 [file] [log] [blame]
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Parallel for algorithm.
package runtime
import "unsafe"
type parforthread struct {
// the thread's iteration space [32lsb, 32msb)
pos uint64
// stats
nsteal uint64
nstealcnt uint64
nprocyield uint64
nosyield uint64
nsleep uint64
pad [_CacheLineSize]byte
}
func desc_thr_index(desc *parfor, i uint32) *parforthread {
return (*parforthread)(add(unsafe.Pointer(desc.thr), uintptr(i)*unsafe.Sizeof(*desc.thr)))
}
func parforsetup(desc *parfor, nthr, n uint32, ctx unsafe.Pointer, wait bool, body func(*parfor, uint32)) {
if desc == nil || nthr == 0 || nthr > desc.nthrmax || body == nil {
print("desc=", desc, " nthr=", nthr, " count=", n, " body=", body, "\n")
gothrow("parfor: invalid args")
}
desc.body = *(*unsafe.Pointer)(unsafe.Pointer(&body))
desc.done = 0
desc.nthr = nthr
desc.thrseq = 0
desc.cnt = n
desc.ctx = ctx
desc.wait = wait
desc.nsteal = 0
desc.nstealcnt = 0
desc.nprocyield = 0
desc.nosyield = 0
desc.nsleep = 0
for i := uint32(0); i < nthr; i++ {
begin := uint32(uint64(n) * uint64(i) / uint64(nthr))
end := uint32(uint64(n) * uint64(i+1) / uint64(nthr))
pos := &desc_thr_index(desc, i).pos
if uintptr(unsafe.Pointer(pos))&7 != 0 {
gothrow("parforsetup: pos is not aligned")
}
*pos = uint64(begin) | uint64(end)<<32
}
}
func parfordo(desc *parfor) {
// Obtain 0-based thread index.
tid := xadd(&desc.thrseq, 1) - 1
if tid >= desc.nthr {
print("tid=", tid, " nthr=", desc.nthr, "\n")
gothrow("parfor: invalid tid")
}
// If single-threaded, just execute the for serially.
body := *(*func(*parfor, uint32))(unsafe.Pointer(&desc.body))
if desc.nthr == 1 {
for i := uint32(0); i < desc.cnt; i++ {
body(desc, i)
}
return
}
me := desc_thr_index(desc, tid)
mypos := &me.pos
for {
for {
// While there is local work,
// bump low index and execute the iteration.
pos := xadd64(mypos, 1)
begin := uint32(pos) - 1
end := uint32(pos >> 32)
if begin < end {
body(desc, begin)
continue
}
break
}
// Out of work, need to steal something.
idle := false
for try := uint32(0); ; try++ {
// If we don't see any work for long enough,
// increment the done counter...
if try > desc.nthr*4 && !idle {
idle = true
xadd(&desc.done, 1)
}
// ...if all threads have incremented the counter,
// we are done.
extra := uint32(0)
if !idle {
extra = 1
}
if desc.done+extra == desc.nthr {
if !idle {
xadd(&desc.done, 1)
}
goto exit
}
// Choose a random victim for stealing.
var begin, end uint32
victim := fastrand1() % (desc.nthr - 1)
if victim >= tid {
victim++
}
victimpos := &desc_thr_index(desc, victim).pos
for {
// See if it has any work.
pos := atomicload64(victimpos)
begin = uint32(pos)
end = uint32(pos >> 32)
if begin+1 >= end {
end = 0
begin = end
break
}
if idle {
xadd(&desc.done, -1)
idle = false
}
begin2 := begin + (end-begin)/2
newpos := uint64(begin) | uint64(begin2)<<32
if cas64(victimpos, pos, newpos) {
begin = begin2
break
}
}
if begin < end {
// Has successfully stolen some work.
if idle {
gothrow("parfor: should not be idle")
}
atomicstore64(mypos, uint64(begin)|uint64(end)<<32)
me.nsteal++
me.nstealcnt += uint64(end) - uint64(begin)
break
}
// Backoff.
if try < desc.nthr {
// nothing
} else if try < 4*desc.nthr {
me.nprocyield++
procyield(20)
} else if !desc.wait {
// If a caller asked not to wait for the others, exit now
// (assume that most work is already done at this point).
if !idle {
xadd(&desc.done, 1)
}
goto exit
} else if try < 6*desc.nthr {
me.nosyield++
osyield()
} else {
me.nsleep++
usleep(1)
}
}
}
exit:
xadd64(&desc.nsteal, int64(me.nsteal))
xadd64(&desc.nstealcnt, int64(me.nstealcnt))
xadd64(&desc.nprocyield, int64(me.nprocyield))
xadd64(&desc.nosyield, int64(me.nosyield))
xadd64(&desc.nsleep, int64(me.nsleep))
me.nsteal = 0
me.nstealcnt = 0
me.nprocyield = 0
me.nosyield = 0
me.nsleep = 0
}