blob: e62a4e90941390a2ea0a3ad01caf7d6685147353 [file] [log] [blame]
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package licenses detects licenses and determines whether they are redistributable.
// The functions in this package do not return errors; instead, they log any problems
// they encounter and fail closed by reporting that the module or package is not
// redistributable.
// Example (modproxy):
// d := licenses.NewDetector(modulePath, version, zipReader, log.Infof)
// modRedist := d.ModuleIsRedistributable()
// Example (discovery):
// d := licenses.NewDetector(modulePath, version, zipReader, log.Infof)
// modRedist := d.ModuleIsRedistributable()
// lics := d.AllLicenses()
// pkgRedist, pkgMetas := d.PackageInfo(pkgSubdir)
package licenses
import (
modzip ""
//go:generate rm -f exceptions.gen.go
//go:generate go run gen_exceptions.go
const (
// coverageThreshold is the minimum percentage of the file that must contain
// license text.
coverageThreshold = 75
// unknownLicenseType is for text in a license file that's not recognized.
unknownLicenseType = "UNKNOWN"
// maxLicenseSize is the maximum allowable size (in bytes) for a license file.
// There are some license files larger than 1 million bytes:
// and, for example.
// var for testing
var maxLicenseSize int64 = modzip.MaxLICENSE
// Metadata holds information extracted from a license file.
type Metadata struct {
// Types is the set of license types, as determined by the licensecheck package.
Types []string
// FilePath is the '/'-separated path to the license file in the module zip,
// relative to the contents directory.
FilePath string
Coverage licensecheck.Coverage
// A License is a classified license file path and its contents.
type License struct {
Contents []byte
// RemoveNonRedistributableData methods removes the license contents
// if the license is non-redistributable.
func (l *License) RemoveNonRedistributableData() {
if !Redistributable(l.Types) {
l.Contents = nil
var (
FileNames = []string{
// standardRedistributableLicenseTypes is the list of license types, as reported by
// licensecheck, that allow redistribution, and also have a name that is an OSI or SPDX
// identifier.
standardRedistributableLicenseTypes = []string{
// Licenses acceptable by OSI.
"PostgreSQL", // TODO: ask legal
// These aren't technically licenses, but they are recognized by
// licensecheck and safe to ignore.
ignorableLicenseTypes = map[string]bool{
"CC-Notice": true,
"GooglePatentClause": true,
"GooglePatentsFile": true,
"blessing": true,
"OFL-1.1": true, // concerns fonts only
// redistributableLicenseTypes is the set of license types, as reported by
// licensecheck, that allow redistribution. It consists of the standard
// types along with some exception types.
redistributableLicenseTypes = map[string]bool{}
func init() {
for _, t := range standardRedistributableLicenseTypes {
redistributableLicenseTypes[t] = true
// Add here all other types defined in the exceptions.
redistributableLicenseTypes["Freetype"] = true
// exceptionTypes is a map from License IDs from LREs in the exception
// directory to license types. Any type mentioned in an exception should
// be redistributable. If not, there's a problem.
for _, types := range exceptionTypes {
for _, t := range types {
if !redistributableLicenseTypes[t] {
log.Fatalf(context.Background(), "%s is an exception type that is not redistributable.", t)
// nonOSILicenses lists licenses that are not approved by OSI.
var nonOSILicenses = map[string]bool{
"BlueOak-1.0.0": true,
"BSD-2-Clause-Views": true,
"CC-BY-3.0": true,
"CC-BY-4.0": true,
"CC-BY-SA-3.0": true,
"CC-BY-SA-4.0": true,
"CC0-1.0": true,
"JSON": true,
"NIST": true,
"OpenSSL": true,
// fileNamesLowercase has all the entries of FileNames, downcased and made a set
// for fast case-insensitive matching.
var fileNamesLowercase = map[string]bool{}
func init() {
for _, f := range FileNames {
fileNamesLowercase[strings.ToLower(f)] = true
// AcceptedLicenseInfo describes a license that is accepted by the discovery site.
type AcceptedLicenseInfo struct {
Name string
URL string
// AcceptedLicenses returns a sorted slice of license types that are accepted as
// redistributable. Its result is intended to be displayed to users.
func AcceptedLicenses() []AcceptedLicenseInfo {
var lics []AcceptedLicenseInfo
for _, identifier := range standardRedistributableLicenseTypes {
var link string
if nonOSILicenses[identifier] {
link = fmt.Sprintf("", identifier)
} else {
link = fmt.Sprintf("", identifier)
lics = append(lics, AcceptedLicenseInfo{identifier, link})
sort.Slice(lics, func(i, j int) bool { return lics[i].Name < lics[j].Name })
return lics
var (
// OmitExceptions causes the list of exceptions to be omitted from license detection.
// It is intended only to speed up testing, and must be set before the first use
// of this package.
OmitExceptions bool
_scanner *licensecheck.Scanner
scannerOnce sync.Once
func scanner() *licensecheck.Scanner {
scannerOnce.Do(func() {
if OmitExceptions {
exceptionLicenses = nil
var err error
_scanner, err = licensecheck.NewScanner(append(exceptionLicenses, licensecheck.BuiltinLicenses()...))
if err != nil {
log.Fatalf(context.Background(), "licensecheck.NewScanner: %v", err)
return _scanner
// A Detector detects licenses in a module and its packages.
type Detector struct {
modulePath string
version string
fsys fs.FS
logf func(string, ...interface{})
moduleRedist bool
moduleLicenses []*License // licenses at module root directory, or list from exceptions
allLicenses []*License
licsByDir map[string][]*License // from directory to list of licenses
// NewDetector returns a Detector for the given module and version.
// zr should be the zip file for that module and version.
// logf is for logging; if nil, no logging is done.
// Deprecated: use NewDetectorFS.
func NewDetector(modulePath, version string, zr *zip.Reader, logf func(string, ...interface{})) *Detector {
sub, err := fs.Sub(zr, modulePath+"@"+version)
// This should only fail if the prefix is not a valid path, which shouldn't be possible.
if err != nil && logf != nil {
logf("fs.Sub: %v", err)
return NewDetectorFS(modulePath, version, sub, logf)
// NewDetectorFS returns a Detector for the given module and version.
// fsys should represent the content directory of the module (not the zip root).
// logf is for logging; if nil, no logging is done.
func NewDetectorFS(modulePath, version string, fsys fs.FS, logf func(string, ...interface{})) *Detector {
if logf == nil {
logf = func(string, ...interface{}) {}
d := &Detector{
modulePath: modulePath,
version: version,
fsys: fsys,
logf: logf,
return d
// ModuleIsRedistributable reports whether the given module is redistributable.
func (d *Detector) ModuleIsRedistributable() bool {
return d.moduleRedist
// ModuleLicenses returns the licenses that apply to the module.
func (d *Detector) ModuleLicenses() []*License {
return d.moduleLicenses
// AllLicenses returns all the licenses detected in the entire module, including
// package licenses.
func (d *Detector) AllLicenses() []*License {
if d.allLicenses == nil {
return d.allLicenses
// PackageInfo reports whether the package at dir, a directory relative to the
// module root, is redistributable. It also returns all the licenses that apply
// to the package.
func (d *Detector) PackageInfo(dir string) (isRedistributable bool, lics []*License) {
cleanDir := filepath.ToSlash(filepath.Clean(dir))
if path.IsAbs(cleanDir) || strings.HasPrefix(cleanDir, "..") {
return false, nil
if d.allLicenses == nil {
// Collect all the license metadata for directories dir and above, excluding the root.
for prefix, plics := range d.licsByDir {
// append a slash so that prefix a/b does not match a/bc/d
if strings.HasPrefix(cleanDir+"/", prefix+"/") {
lics = append(lics, plics...)
// A package is redistributable if its module is, and if other licenses on
// the path to the root are redistributable. Note that this is not the same
// as asking if the module licenses plus the package licenses are
// redistributable. A module that is granted an exception (see DetectFiles)
// may have licenses that are non-redistributable.
ltypes := types(lics)
isRedistributable = d.ModuleIsRedistributable() && (len(ltypes) == 0 || Redistributable(ltypes))
// A package's licenses include the ones we've already computed, as well
// as the module licenses.
return isRedistributable, append(lics, d.moduleLicenses...)
// computeModuleInfo determines values for the moduleRedist and moduleLicenses fields of d.
func (d *Detector) computeModuleInfo() {
// Check that all licenses in the contents directory are redistributable.
d.moduleLicenses = d.detectFiles(d.paths(RootFiles))
d.moduleRedist = Redistributable(types(d.moduleLicenses))
// computeAllLicenseInfo collects all the detected licenses in the zip and
// stores them in the allLicenses field of d. It also maps detected licenses to
// their directories, to optimize Detector.PackageInfo.
func (d *Detector) computeAllLicenseInfo() {
d.allLicenses = []*License{}
d.allLicenses = append(d.allLicenses, d.moduleLicenses...)
nonRootLicenses := d.detectFiles(d.paths(NonRootFiles))
d.allLicenses = append(d.allLicenses, nonRootLicenses...)
d.licsByDir = map[string][]*License{}
for _, l := range nonRootLicenses {
prefix := path.Dir(l.FilePath)
d.licsByDir[prefix] = append(d.licsByDir[prefix], l)
// WhichFiles describes which files from the zip should be returned by Detector.Files.
type WhichFiles int
const (
// Only files from the root (contents) directory.
RootFiles WhichFiles = iota
// Only files that are not in the root directory.
// All files; the union of root and non-root.
// paths returns a list of license file paths from the Detector's filesystem.
// The which argument determines the location of the files considered.
// If paths encounters an error, it logs it and returns nil.
func (d *Detector) paths(which WhichFiles) []string {
if d.fsys == nil {
return nil
var paths []string
err := fs.WalkDir(d.fsys, ".", func(pathname string, de fs.DirEntry, err error) error {
if err != nil {
return err
if de.IsDir() {
return nil
if !fileNamesLowercase[strings.ToLower(de.Name())] {
return nil
// Skip files we should ignore.
if ignoreFiles[d.modulePath+" "+pathname] {
return nil
if which == RootFiles && path.Dir(pathname) != "." {
// Skip f since it's not at root.
return nil
if which == NonRootFiles && path.Dir(pathname) == "." {
// Skip f since it is at root.
return nil
if isVendoredFile(pathname) {
// Skip if f is in the vendor directory.
return nil
if err := module.CheckFilePath(pathname); err != nil {
// Skip if the file path is bad.
d.logf("module.CheckFilePath(%q): %v", pathname, err)
return nil
paths = append(paths, pathname)
return nil
if err != nil {
d.logf("licenses.Detector.paths: %v", err)
return nil
return paths
// isVendoredFile reports if the given file is in a proper subdirectory nested
// under a 'vendor' directory, to allow for Go packages named 'vendor'.
// For example:
// - isVendoredFile("vendor/LICENSE") == false, and
// - isVendoredFile("vendor/foo/LICENSE") == true.
func isVendoredFile(name string) bool {
var vendorOffset int
if strings.HasPrefix(name, "vendor/") {
vendorOffset = len("vendor/")
} else if i := strings.Index(name, "/vendor/"); i >= 0 {
vendorOffset = i + len("/vendor/")
} else {
// no vendor directory
return false
// check if the file is in a proper subdirectory of vendor
return strings.Contains(name[vendorOffset:], "/")
// detectFiles runs DetectFile on each of the given files.
// If a file cannot be read, the error is logged and a license
// of type unknown is added.
func (d *Detector) detectFiles(pathnames []string) []*License {
var licenses []*License
for _, p := range pathnames {
bytes, err := d.readFile(p)
if err != nil {
d.logf("reading file %s: %v", p, err)
licenses = append(licenses, &License{
Metadata: &Metadata{
Types: []string{unknownLicenseType},
FilePath: p,
types, cov := DetectFile(bytes, p, d.logf)
licenses = append(licenses, &License{
Metadata: &Metadata{
Types: types,
FilePath: p,
Coverage: cov,
Contents: bytes,
return licenses
func (d *Detector) readFile(pathname string) ([]byte, error) {
f, err := d.fsys.Open(pathname)
if err != nil {
return nil, err
defer f.Close()
info, err := f.Stat()
if err != nil {
return nil, err
if info.Size() > maxLicenseSize {
return nil, fmt.Errorf("file size %d exceeds max license size %d", info.Size(), maxLicenseSize)
return io.ReadAll(io.LimitReader(f, int64(maxLicenseSize)))
// DetectFile return the set of license types for the given file contents. It
// also returns the licensecheck coverage information. The filename is used
// solely for logging.
func DetectFile(contents []byte, filename string, logf func(string, ...interface{})) ([]string, licensecheck.Coverage) {
if logf == nil {
logf = func(string, ...interface{}) {}
cov := scanner().Scan(contents)
if cov.Percent < float64(coverageThreshold) {
logf("%s license coverage too low (%+v), skipping", filename, cov)
return []string{unknownLicenseType}, cov
types := make(map[string]bool)
for _, m := range cov.Match {
ts := exceptionTypes[m.ID]
if ts == nil {
ts = []string{m.ID}
for _, t := range ts {
types[t] = true
if len(types) == 0 {
logf("%s failed to classify license (%+v), skipping", filename, cov)
return []string{unknownLicenseType}, cov
return setToSortedSlice(types), cov
// Redistributable reports whether the set of license types establishes that a
// module or package is redistributable.
// All the licenses we see that are relevant must be redistributable, and
// we must see at least one such license.
func Redistributable(licenseTypes []string) bool {
sawRedist := false
for _, t := range licenseTypes {
if ignorableLicenseTypes[t] {
if !redistributableLicenseTypes[t] {
return false
sawRedist = true
return sawRedist
func types(lics []*License) []string {
var types []string
for _, l := range lics {
types = append(types, l.Types...)
return types
func setToSortedSlice(m map[string]bool) []string {
var s []string
for e := range m {
s = append(s, e)
return s