| // Copyright 2010 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package runtime |
| |
| import ( |
| "runtime/internal/sys" |
| "unsafe" |
| ) |
| |
| const ( |
| _EACCES = 13 |
| _EINVAL = 22 |
| ) |
| |
| // NOTE: vec must be just 1 byte long here. |
| // Mincore returns ENOMEM if any of the pages are unmapped, |
| // but we want to know that all of the pages are unmapped. |
| // To make these the same, we can only ask about one page |
| // at a time. See golang.org/issue/7476. |
| var addrspace_vec [1]byte |
| |
| func addrspace_free(v unsafe.Pointer, n uintptr) bool { |
| for off := uintptr(0); off < n; off += physPageSize { |
| // Use a length of 1 byte, which the kernel will round |
| // up to one physical page regardless of the true |
| // physical page size. |
| errval := mincore(unsafe.Pointer(uintptr(v)+off), 1, &addrspace_vec[0]) |
| if errval == -_EINVAL { |
| // Address is not a multiple of the physical |
| // page size. Shouldn't happen, but just ignore it. |
| continue |
| } |
| // ENOMEM means unmapped, which is what we want. |
| // Anything else we assume means the pages are mapped. |
| if errval != -_ENOMEM { |
| return false |
| } |
| } |
| return true |
| } |
| |
| func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer { |
| p := mmap(v, n, prot, flags, fd, offset) |
| // On some systems, mmap ignores v without |
| // MAP_FIXED, so retry if the address space is free. |
| if p != v && addrspace_free(v, n) { |
| if uintptr(p) > 4096 { |
| munmap(p, n) |
| } |
| p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset) |
| } |
| return p |
| } |
| |
| // Don't split the stack as this method may be invoked without a valid G, which |
| // prevents us from allocating more stack. |
| //go:nosplit |
| func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { |
| p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) |
| if uintptr(p) < 4096 { |
| if uintptr(p) == _EACCES { |
| print("runtime: mmap: access denied\n") |
| exit(2) |
| } |
| if uintptr(p) == _EAGAIN { |
| print("runtime: mmap: too much locked memory (check 'ulimit -l').\n") |
| exit(2) |
| } |
| return nil |
| } |
| mSysStatInc(sysStat, n) |
| return p |
| } |
| |
| func sysUnused(v unsafe.Pointer, n uintptr) { |
| // By default, Linux's "transparent huge page" support will |
| // merge pages into a huge page if there's even a single |
| // present regular page, undoing the effects of the DONTNEED |
| // below. On amd64, that means khugepaged can turn a single |
| // 4KB page to 2MB, bloating the process's RSS by as much as |
| // 512X. (See issue #8832 and Linux kernel bug |
| // https://bugzilla.kernel.org/show_bug.cgi?id=93111) |
| // |
| // To work around this, we explicitly disable transparent huge |
| // pages when we release pages of the heap. However, we have |
| // to do this carefully because changing this flag tends to |
| // split the VMA (memory mapping) containing v in to three |
| // VMAs in order to track the different values of the |
| // MADV_NOHUGEPAGE flag in the different regions. There's a |
| // default limit of 65530 VMAs per address space (sysctl |
| // vm.max_map_count), so we must be careful not to create too |
| // many VMAs (see issue #12233). |
| // |
| // Since huge pages are huge, there's little use in adjusting |
| // the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid |
| // exploding the number of VMAs by only adjusting the |
| // MADV_NOHUGEPAGE flag on a large granularity. This still |
| // gets most of the benefit of huge pages while keeping the |
| // number of VMAs under control. With hugePageSize = 2MB, even |
| // a pessimal heap can reach 128GB before running out of VMAs. |
| if sys.HugePageSize != 0 { |
| var s uintptr = sys.HugePageSize // division by constant 0 is a compile-time error :( |
| |
| // If it's a large allocation, we want to leave huge |
| // pages enabled. Hence, we only adjust the huge page |
| // flag on the huge pages containing v and v+n-1, and |
| // only if those aren't aligned. |
| var head, tail uintptr |
| if uintptr(v)%s != 0 { |
| // Compute huge page containing v. |
| head = uintptr(v) &^ (s - 1) |
| } |
| if (uintptr(v)+n)%s != 0 { |
| // Compute huge page containing v+n-1. |
| tail = (uintptr(v) + n - 1) &^ (s - 1) |
| } |
| |
| // Note that madvise will return EINVAL if the flag is |
| // already set, which is quite likely. We ignore |
| // errors. |
| if head != 0 && head+sys.HugePageSize == tail { |
| // head and tail are different but adjacent, |
| // so do this in one call. |
| madvise(unsafe.Pointer(head), 2*sys.HugePageSize, _MADV_NOHUGEPAGE) |
| } else { |
| // Advise the huge pages containing v and v+n-1. |
| if head != 0 { |
| madvise(unsafe.Pointer(head), sys.HugePageSize, _MADV_NOHUGEPAGE) |
| } |
| if tail != 0 && tail != head { |
| madvise(unsafe.Pointer(tail), sys.HugePageSize, _MADV_NOHUGEPAGE) |
| } |
| } |
| } |
| |
| if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 { |
| // madvise will round this to any physical page |
| // *covered* by this range, so an unaligned madvise |
| // will release more memory than intended. |
| throw("unaligned sysUnused") |
| } |
| |
| madvise(v, n, _MADV_DONTNEED) |
| } |
| |
| func sysUsed(v unsafe.Pointer, n uintptr) { |
| if sys.HugePageSize != 0 { |
| // Partially undo the NOHUGEPAGE marks from sysUnused |
| // for whole huge pages between v and v+n. This may |
| // leave huge pages off at the end points v and v+n |
| // even though allocations may cover these entire huge |
| // pages. We could detect this and undo NOHUGEPAGE on |
| // the end points as well, but it's probably not worth |
| // the cost because when neighboring allocations are |
| // freed sysUnused will just set NOHUGEPAGE again. |
| var s uintptr = sys.HugePageSize |
| |
| // Round v up to a huge page boundary. |
| beg := (uintptr(v) + (s - 1)) &^ (s - 1) |
| // Round v+n down to a huge page boundary. |
| end := (uintptr(v) + n) &^ (s - 1) |
| |
| if beg < end { |
| madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE) |
| } |
| } |
| } |
| |
| // Don't split the stack as this function may be invoked without a valid G, |
| // which prevents us from allocating more stack. |
| //go:nosplit |
| func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) { |
| mSysStatDec(sysStat, n) |
| munmap(v, n) |
| } |
| |
| func sysFault(v unsafe.Pointer, n uintptr) { |
| mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0) |
| } |
| |
| func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer { |
| // On 64-bit, people with ulimit -v set complain if we reserve too |
| // much address space. Instead, assume that the reservation is okay |
| // if we can reserve at least 64K and check the assumption in SysMap. |
| // Only user-mode Linux (UML) rejects these requests. |
| if sys.PtrSize == 8 && uint64(n) > 1<<32 { |
| p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) |
| if p != v { |
| if uintptr(p) >= 4096 { |
| munmap(p, 64<<10) |
| } |
| return nil |
| } |
| munmap(p, 64<<10) |
| *reserved = false |
| return v |
| } |
| |
| p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) |
| if uintptr(p) < 4096 { |
| return nil |
| } |
| *reserved = true |
| return p |
| } |
| |
| func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) { |
| mSysStatInc(sysStat, n) |
| |
| // On 64-bit, we don't actually have v reserved, so tread carefully. |
| if !reserved { |
| p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) |
| if uintptr(p) == _ENOMEM { |
| throw("runtime: out of memory") |
| } |
| if p != v { |
| print("runtime: address space conflict: map(", v, ") = ", p, "\n") |
| throw("runtime: address space conflict") |
| } |
| return |
| } |
| |
| p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) |
| if uintptr(p) == _ENOMEM { |
| throw("runtime: out of memory") |
| } |
| if p != v { |
| throw("runtime: cannot map pages in arena address space") |
| } |
| } |