blob: dd8293fad75fb3b3f348699c49d5949bca4cedd2 [file] [log] [blame]
Robert Findleyae242ec2023-01-19 16:41:08 -05001// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package cache
6
7import (
8 "context"
9 "os"
10 "sync"
11 "time"
12
Rob Findleyc8234882023-11-17 16:14:35 -050013 "golang.org/x/tools/gopls/internal/file"
Alan Donovan6d109d12024-01-23 15:35:40 -050014 "golang.org/x/tools/gopls/internal/protocol"
Robert Findleyae242ec2023-01-19 16:41:08 -050015 "golang.org/x/tools/internal/event"
16 "golang.org/x/tools/internal/event/tag"
17 "golang.org/x/tools/internal/robustio"
18)
19
20// A memoizedFS is a file source that memoizes reads, to reduce IO.
21type memoizedFS struct {
22 mu sync.Mutex
23
24 // filesByID maps existing file inodes to the result of a read.
25 // (The read may have failed, e.g. due to EACCES or a delete between stat+read.)
26 // Each slice is a non-empty list of aliases: different URIs.
Alan Donovanbd547e52024-01-22 16:57:51 -050027 filesByID map[robustio.FileID][]*diskFile
Robert Findleyae242ec2023-01-19 16:41:08 -050028}
29
Rob Findleyb85cfba2023-12-15 18:12:52 -050030func newMemoizedFS() *memoizedFS {
Alan Donovanbd547e52024-01-22 16:57:51 -050031 return &memoizedFS{filesByID: make(map[robustio.FileID][]*diskFile)}
Rob Findleyb85cfba2023-12-15 18:12:52 -050032}
33
Alan Donovanbd547e52024-01-22 16:57:51 -050034// A diskFile is a file in the filesystem, or a failure to read one.
Alan Donovan6823da42024-01-23 15:29:30 -050035// It implements the file.Source interface.
Alan Donovanbd547e52024-01-22 16:57:51 -050036type diskFile struct {
Alan Donovanccacebc2023-11-17 16:06:37 -050037 uri protocol.DocumentURI
Robert Findleyae242ec2023-01-19 16:41:08 -050038 modTime time.Time
39 content []byte
Rob Findleyc8234882023-11-17 16:14:35 -050040 hash file.Hash
Robert Findleyae242ec2023-01-19 16:41:08 -050041 err error
42}
43
Alan Donovanbd547e52024-01-22 16:57:51 -050044func (h *diskFile) URI() protocol.DocumentURI { return h.uri }
Robert Findleyae242ec2023-01-19 16:41:08 -050045
Alan Donovanbd547e52024-01-22 16:57:51 -050046func (h *diskFile) Identity() file.Identity {
Rob Findleyc8234882023-11-17 16:14:35 -050047 return file.Identity{
Robert Findleyae242ec2023-01-19 16:41:08 -050048 URI: h.uri,
49 Hash: h.hash,
50 }
51}
52
Alan Donovanbd547e52024-01-22 16:57:51 -050053func (h *diskFile) SameContentsOnDisk() bool { return true }
54func (h *diskFile) Version() int32 { return 0 }
55func (h *diskFile) Content() ([]byte, error) { return h.content, h.err }
Robert Findleyae242ec2023-01-19 16:41:08 -050056
Alan Donovan36ed0b12023-03-13 14:20:23 -040057// ReadFile stats and (maybe) reads the file, updates the cache, and returns it.
Rob Findleyc8234882023-11-17 16:14:35 -050058func (fs *memoizedFS) ReadFile(ctx context.Context, uri protocol.DocumentURI) (file.Handle, error) {
Alan Donovan1cab1272023-11-17 16:16:59 -050059 id, mtime, err := robustio.GetFileID(uri.Path())
Robert Findleyae242ec2023-01-19 16:41:08 -050060 if err != nil {
61 // file does not exist
Alan Donovanbd547e52024-01-22 16:57:51 -050062 return &diskFile{
Robert Findleyae242ec2023-01-19 16:41:08 -050063 err: err,
64 uri: uri,
65 }, nil
66 }
67
68 // We check if the file has changed by comparing modification times. Notably,
69 // this is an imperfect heuristic as various systems have low resolution
70 // mtimes (as much as 1s on WSL or s390x builders), so we only cache
71 // filehandles if mtime is old enough to be reliable, meaning that we don't
72 // expect a subsequent write to have the same mtime.
73 //
74 // The coarsest mtime precision we've seen in practice is 1s, so consider
75 // mtime to be unreliable if it is less than 2s old. Capture this before
76 // doing anything else.
77 recentlyModified := time.Since(mtime) < 2*time.Second
78
79 fs.mu.Lock()
80 fhs, ok := fs.filesByID[id]
81 if ok && fhs[0].modTime.Equal(mtime) {
Alan Donovanbd547e52024-01-22 16:57:51 -050082 var fh *diskFile
Robert Findleyae242ec2023-01-19 16:41:08 -050083 // We have already seen this file and it has not changed.
84 for _, h := range fhs {
85 if h.uri == uri {
86 fh = h
87 break
88 }
89 }
90 // No file handle for this exact URI. Create an alias, but share content.
91 if fh == nil {
92 newFH := *fhs[0]
93 newFH.uri = uri
94 fh = &newFH
95 fhs = append(fhs, fh)
96 fs.filesByID[id] = fhs
97 }
98 fs.mu.Unlock()
99 return fh, nil
100 }
101 fs.mu.Unlock()
102
103 // Unknown file, or file has changed. Read (or re-read) it.
104 fh, err := readFile(ctx, uri, mtime) // ~25us
105 if err != nil {
106 return nil, err // e.g. cancelled (not: read failed)
107 }
108
109 fs.mu.Lock()
110 if !recentlyModified {
Alan Donovanbd547e52024-01-22 16:57:51 -0500111 fs.filesByID[id] = []*diskFile{fh}
Robert Findleyae242ec2023-01-19 16:41:08 -0500112 } else {
113 delete(fs.filesByID, id)
114 }
115 fs.mu.Unlock()
116 return fh, nil
117}
118
Rob Findley1c5ccad2023-04-18 13:48:38 -0400119// fileStats returns information about the set of files stored in fs. It is
120// intended for debugging only.
121func (fs *memoizedFS) fileStats() (files, largest, errs int) {
122 fs.mu.Lock()
123 defer fs.mu.Unlock()
124
125 files = len(fs.filesByID)
126 largest = 0
127 errs = 0
128
129 for _, files := range fs.filesByID {
130 rep := files[0]
131 if len(rep.content) > largest {
132 largest = len(rep.content)
133 }
134 if rep.err != nil {
135 errs++
136 }
137 }
138 return files, largest, errs
139}
140
Robert Findleyae242ec2023-01-19 16:41:08 -0500141// ioLimit limits the number of parallel file reads per process.
142var ioLimit = make(chan struct{}, 128)
143
Alan Donovanbd547e52024-01-22 16:57:51 -0500144func readFile(ctx context.Context, uri protocol.DocumentURI, mtime time.Time) (*diskFile, error) {
Robert Findleyae242ec2023-01-19 16:41:08 -0500145 select {
146 case ioLimit <- struct{}{}:
147 case <-ctx.Done():
148 return nil, ctx.Err()
149 }
150 defer func() { <-ioLimit }()
151
Alan Donovan1cab1272023-11-17 16:16:59 -0500152 ctx, done := event.Start(ctx, "cache.readFile", tag.File.Of(uri.Path()))
Robert Findleyae242ec2023-01-19 16:41:08 -0500153 _ = ctx
154 defer done()
155
156 // It is possible that a race causes us to read a file with different file
157 // ID, or whose mtime differs from the given mtime. However, in these cases
158 // we expect the client to notify of a subsequent file change, and the file
159 // content should be eventually consistent.
Alan Donovan1cab1272023-11-17 16:16:59 -0500160 content, err := os.ReadFile(uri.Path()) // ~20us
Robert Findleyae242ec2023-01-19 16:41:08 -0500161 if err != nil {
162 content = nil // just in case
163 }
Alan Donovanbd547e52024-01-22 16:57:51 -0500164 return &diskFile{
Robert Findleyae242ec2023-01-19 16:41:08 -0500165 modTime: mtime,
166 uri: uri,
167 content: content,
Rob Findleyc8234882023-11-17 16:14:35 -0500168 hash: file.HashOf(content),
Robert Findleyae242ec2023-01-19 16:41:08 -0500169 err: err,
170 }, nil
171}