log: reduce lock contention

Logger.Log methods are called in a highly concurrent manner in many servers.
Acquiring a lock in each method call results in high lock contention,
especially since each lock covers a non-trivial amount of work
(e.g., formatting the header and outputting to the writer).

Changes made:
* Modify the Logger to use atomics so that the header formatting
  can be moved out of the critical lock section.
  Acquiring the flags does not occur in the same critical section
  as outputting to the underlying writer, so this introduces
  a very slight consistency instability where concurrently calling
  multiple Logger.Output along with Logger.SetFlags may cause
  the older flags to be used by some ongoing Logger.Output calls
  even after Logger.SetFlags has returned.
* Use a sync.Pool to buffer the intermediate buffer.
  This approach is identical to how fmt does it,
  with the same max cap mitigation for #23199.
* Only protect outputting to the underlying writer with a lock
  to ensure serialized ordering of output.

Performance:
	name           old time/op  new time/op  delta
	Concurrent-24  19.9µs ± 2%   8.3µs ± 1%  -58.37%  (p=0.000 n=10+10)

Updates #19438

Change-Id: I091beb7431d8661976a6c01cdb0d145e37fe3d22
Reviewed-on: https://go-review.googlesource.com/c/go/+/464344
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Joseph Tsai <joetsai@digital-static.net>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: Bryan Mills <bcmills@google.com>
diff --git a/src/log/log.go b/src/log/log.go
index 566535f..9e38b6e 100644
--- a/src/log/log.go
+++ b/src/log/log.go
@@ -54,12 +54,12 @@
 // the Writer's Write method. A Logger can be used simultaneously from
 // multiple goroutines; it guarantees to serialize access to the Writer.
 type Logger struct {
-	mu        sync.Mutex  // ensures atomic writes; protects the following fields
-	prefix    string      // prefix on each line to identify the logger (but see Lmsgprefix)
-	flag      int         // properties
-	out       io.Writer   // destination for output
-	buf       []byte      // for accumulating text to write
-	isDiscard atomic.Bool // whether out == io.Discard
+	outMu sync.Mutex
+	out   io.Writer // destination for output
+
+	prefix    atomic.Pointer[string] // prefix on each line to identify the logger (but see Lmsgprefix)
+	flag      atomic.Int32           // properties
+	isDiscard atomic.Bool
 }
 
 // New creates a new Logger. The out variable sets the
@@ -68,17 +68,17 @@
 // after the log header if the Lmsgprefix flag is provided.
 // The flag argument defines the logging properties.
 func New(out io.Writer, prefix string, flag int) *Logger {
-	l := &Logger{out: out, prefix: prefix, flag: flag}
-	if out == io.Discard {
-		l.isDiscard.Store(true)
-	}
+	l := new(Logger)
+	l.SetOutput(out)
+	l.SetPrefix(prefix)
+	l.SetFlags(flag)
 	return l
 }
 
 // SetOutput sets the output destination for the logger.
 func (l *Logger) SetOutput(w io.Writer) {
-	l.mu.Lock()
-	defer l.mu.Unlock()
+	l.outMu.Lock()
+	defer l.outMu.Unlock()
 	l.out = w
 	l.isDiscard.Store(w == io.Discard)
 }
@@ -110,15 +110,15 @@
 //   - date and/or time (if corresponding flags are provided),
 //   - file and line number (if corresponding flags are provided),
 //   - l.prefix (if it's not blank and Lmsgprefix is set).
-func (l *Logger) formatHeader(buf *[]byte, t time.Time, file string, line int) {
-	if l.flag&Lmsgprefix == 0 {
-		*buf = append(*buf, l.prefix...)
+func formatHeader(buf *[]byte, t time.Time, prefix string, flag int, file string, line int) {
+	if flag&Lmsgprefix == 0 {
+		*buf = append(*buf, prefix...)
 	}
-	if l.flag&(Ldate|Ltime|Lmicroseconds) != 0 {
-		if l.flag&LUTC != 0 {
+	if flag&(Ldate|Ltime|Lmicroseconds) != 0 {
+		if flag&LUTC != 0 {
 			t = t.UTC()
 		}
-		if l.flag&Ldate != 0 {
+		if flag&Ldate != 0 {
 			year, month, day := t.Date()
 			itoa(buf, year, 4)
 			*buf = append(*buf, '/')
@@ -127,22 +127,22 @@
 			itoa(buf, day, 2)
 			*buf = append(*buf, ' ')
 		}
-		if l.flag&(Ltime|Lmicroseconds) != 0 {
+		if flag&(Ltime|Lmicroseconds) != 0 {
 			hour, min, sec := t.Clock()
 			itoa(buf, hour, 2)
 			*buf = append(*buf, ':')
 			itoa(buf, min, 2)
 			*buf = append(*buf, ':')
 			itoa(buf, sec, 2)
-			if l.flag&Lmicroseconds != 0 {
+			if flag&Lmicroseconds != 0 {
 				*buf = append(*buf, '.')
 				itoa(buf, t.Nanosecond()/1e3, 6)
 			}
 			*buf = append(*buf, ' ')
 		}
 	}
-	if l.flag&(Lshortfile|Llongfile) != 0 {
-		if l.flag&Lshortfile != 0 {
+	if flag&(Lshortfile|Llongfile) != 0 {
+		if flag&Lshortfile != 0 {
 			short := file
 			for i := len(file) - 1; i > 0; i-- {
 				if file[i] == '/' {
@@ -157,11 +157,32 @@
 		itoa(buf, line, -1)
 		*buf = append(*buf, ": "...)
 	}
-	if l.flag&Lmsgprefix != 0 {
-		*buf = append(*buf, l.prefix...)
+	if flag&Lmsgprefix != 0 {
+		*buf = append(*buf, prefix...)
 	}
 }
 
+var bufferPool = sync.Pool{New: func() any { return new([]byte) }}
+
+func getBuffer() *[]byte {
+	p := bufferPool.Get().(*[]byte)
+	*p = (*p)[:0]
+	return p
+}
+
+func putBuffer(p *[]byte) {
+	// Proper usage of a sync.Pool requires each entry to have approximately
+	// the same memory cost. To obtain this property when the stored type
+	// contains a variably-sized buffer, we add a hard limit on the maximum buffer
+	// to place back in the pool.
+	//
+	// See https://go.dev/issue/23199
+	if cap(*p) > 64<<10 {
+		*p = nil
+	}
+	bufferPool.Put(p)
+}
+
 // Output writes the output for a logging event. The string s contains
 // the text to print after the prefix specified by the flags of the
 // Logger. A newline is appended if the last character of s is not
@@ -170,28 +191,34 @@
 // paths it will be 2.
 func (l *Logger) Output(calldepth int, s string) error {
 	now := time.Now() // get this early.
+
+	// Load prefix and flag once so that their value is consistent within
+	// this call regardless of any concurrent changes to their value.
+	prefix := l.Prefix()
+	flag := l.Flags()
+
 	var file string
 	var line int
-	l.mu.Lock()
-	defer l.mu.Unlock()
-	if l.flag&(Lshortfile|Llongfile) != 0 {
-		// Release lock while getting caller info - it's expensive.
-		l.mu.Unlock()
+	if flag&(Lshortfile|Llongfile) != 0 {
 		var ok bool
 		_, file, line, ok = runtime.Caller(calldepth)
 		if !ok {
 			file = "???"
 			line = 0
 		}
-		l.mu.Lock()
 	}
-	l.buf = l.buf[:0]
-	l.formatHeader(&l.buf, now, file, line)
-	l.buf = append(l.buf, s...)
+
+	buf := getBuffer()
+	defer putBuffer(buf)
+	formatHeader(buf, now, prefix, flag, file, line)
+	*buf = append(*buf, s...)
 	if len(s) == 0 || s[len(s)-1] != '\n' {
-		l.buf = append(l.buf, '\n')
+		*buf = append(*buf, '\n')
 	}
-	_, err := l.out.Write(l.buf)
+
+	l.outMu.Lock()
+	defer l.outMu.Unlock()
+	_, err := l.out.Write(*buf)
 	return err
 }
 
@@ -264,37 +291,32 @@
 // Flags returns the output flags for the logger.
 // The flag bits are Ldate, Ltime, and so on.
 func (l *Logger) Flags() int {
-	l.mu.Lock()
-	defer l.mu.Unlock()
-	return l.flag
+	return int(l.flag.Load())
 }
 
 // SetFlags sets the output flags for the logger.
 // The flag bits are Ldate, Ltime, and so on.
 func (l *Logger) SetFlags(flag int) {
-	l.mu.Lock()
-	defer l.mu.Unlock()
-	l.flag = flag
+	l.flag.Store(int32(flag))
 }
 
 // Prefix returns the output prefix for the logger.
 func (l *Logger) Prefix() string {
-	l.mu.Lock()
-	defer l.mu.Unlock()
-	return l.prefix
+	if p := l.prefix.Load(); p != nil {
+		return *p
+	}
+	return ""
 }
 
 // SetPrefix sets the output prefix for the logger.
 func (l *Logger) SetPrefix(prefix string) {
-	l.mu.Lock()
-	defer l.mu.Unlock()
-	l.prefix = prefix
+	l.prefix.Store(&prefix)
 }
 
 // Writer returns the output destination for the logger.
 func (l *Logger) Writer() io.Writer {
-	l.mu.Lock()
-	defer l.mu.Unlock()
+	l.outMu.Lock()
+	defer l.outMu.Unlock()
 	return l.out
 }
 
diff --git a/src/log/log_test.go b/src/log/log_test.go
index f2ef165..c212b60 100644
--- a/src/log/log_test.go
+++ b/src/log/log_test.go
@@ -12,7 +12,9 @@
 	"io"
 	"os"
 	"regexp"
+	"runtime"
 	"strings"
+	"sync"
 	"testing"
 	"time"
 )
@@ -21,7 +23,7 @@
 	Rdate         = `[0-9][0-9][0-9][0-9]/[0-9][0-9]/[0-9][0-9]`
 	Rtime         = `[0-9][0-9]:[0-9][0-9]:[0-9][0-9]`
 	Rmicroseconds = `\.[0-9][0-9][0-9][0-9][0-9][0-9]`
-	Rline         = `(61|63):` // must update if the calls to l.Printf / l.Print below move
+	Rline         = `(63|65):` // must update if the calls to l.Printf / l.Print below move
 	Rlongfile     = `.*/[A-Za-z0-9_\-]+\.go:` + Rline
 	Rshortfile    = `[A-Za-z0-9_\-]+\.go:` + Rline
 )
@@ -98,14 +100,20 @@
 	}
 }
 
+func TestNonNewLogger(t *testing.T) {
+	var l Logger
+	l.SetOutput(new(bytes.Buffer)) // minimal work to initialize a Logger
+	l.Print("hello")
+}
+
 func TestOutputRace(t *testing.T) {
 	var b bytes.Buffer
 	l := New(&b, "", 0)
 	for i := 0; i < 100; i++ {
 		go func() {
 			l.SetFlags(0)
+			l.Output(0, "")
 		}()
-		l.Output(0, "")
 	}
 }
 
@@ -224,3 +232,26 @@
 		l.Println(testString)
 	}
 }
+
+// discard is identical to io.Discard,
+// but copied here to avoid the io.Discard optimization in Logger.
+type discard struct{}
+
+func (discard) Write(p []byte) (int, error) {
+	return len(p), nil
+}
+
+func BenchmarkConcurrent(b *testing.B) {
+	l := New(discard{}, "prefix: ", Ldate|Ltime|Lmicroseconds|Llongfile|Lmsgprefix)
+	var group sync.WaitGroup
+	for i := runtime.NumCPU(); i > 0; i-- {
+		group.Add(1)
+		go func() {
+			for i := 0; i < b.N; i++ {
+				l.Output(0, "hello, world!")
+			}
+			defer group.Done()
+		}()
+	}
+	group.Wait()
+}