-
Notifications
You must be signed in to change notification settings - Fork 124
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Works around missing Module.dir for vendored Modules #178
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ import ( | |
"context" | ||
"fmt" | ||
"go/build" | ||
"path" | ||
"path/filepath" | ||
"sort" | ||
"strings" | ||
|
@@ -117,7 +118,33 @@ func Libraries(ctx context.Context, classifier Classifier, includeTests bool, ig | |
klog.Errorf("Package %s does not have module info. Non go modules projects are no longer supported. For feedback, refer to https://github.com/google/go-licenses/issues/128.", p.PkgPath) | ||
return false | ||
} | ||
licensePath, err := Find(pkgDir, p.Module.Dir, classifier) | ||
|
||
stopAt := p.Module.Dir | ||
if isVendorModule(newModule(p.Module), pkgDir) { | ||
splits := strings.SplitN(pkgDir, "/vendor/", 2) | ||
moduleSplits := strings.Split(p.Module.Path, "/") | ||
// There seems to be 3 common cases in terms of go module directory structure: | ||
// - "github" case follows github.com/<org>/<repo> | ||
// - "k8s.io" case follow k8s.io/<module> | ||
// - "go.starlark.net" (and probably lots others not exactly sure why/when this happens) case where go.starlark.net is the only folder in the structure | ||
// To account for the above, the stopAt is set to the the greatest depth up to 3 subfolders | ||
// Alternative: Since license files should only exists in the context of module folders within | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like this alternative solution. The assumption that vendor folder and module dir folders do not have license files makes sense and is simpler than assuming a depth <= 3. What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think so too, we can always adjust it if/when running into a case of a license file being in vendor, which seems less likely than depending on the current assumptions holding true. Im going to change it in my build and run it thru our pipelines and just make sure nothing new pops up and Ill update this PR. |
||
// the vendor directory, and not repo (github.com) folders or the vendor folder itself, we could | ||
// set stopAt to vendor mainly just to ensure traversal never goes above vendor, finding the main module license | ||
switch { | ||
case len(moduleSplits) > 2: | ||
stopAt = path.Join(splits[0], "vendor", moduleSplits[0], moduleSplits[1], moduleSplits[2]) | ||
case len(moduleSplits) == 2: | ||
stopAt = path.Join(splits[0], "vendor", moduleSplits[0], moduleSplits[1]) | ||
case len(moduleSplits) == 1: | ||
stopAt = path.Join(splits[0], "vendor", moduleSplits[0]) | ||
default: | ||
klog.Errorf("module %s has an unexpected number of directories. Report to go-licenses developer if you see this.", p.Module.Path) | ||
return false | ||
} | ||
} | ||
|
||
licensePath, err := Find(pkgDir, stopAt, classifier) | ||
if err != nil { | ||
klog.Errorf("Failed to find license for %s: %v", p.PkgPath, err) | ||
} | ||
|
@@ -156,43 +183,25 @@ func Libraries(ctx context.Context, classifier Classifier, includeTests bool, ig | |
lib.module = newModule(pkg.Module) | ||
} | ||
} | ||
if lib.module != nil && lib.module.Path != "" && lib.module.Dir == "" { | ||
// A known cause is that the module is vendored, so some information is lost. | ||
if isVendorModule(lib.module, lib.LicensePath) { | ||
splits := strings.SplitN(lib.LicensePath, "/vendor/", 2) | ||
if len(splits) != 2 { | ||
klog.Warningf("module %s does not have dir and it's not vendored, cannot discover the license URL. Report to go-licenses developer if you see this.", lib.module.Path) | ||
} else { | ||
// This is vendored. Handle this known special case. | ||
|
||
// Extra note why we identify a vendored package like this. | ||
// | ||
// For a normal package: | ||
// * if it's not in a module, lib.module == nil | ||
// * if it's in a module, lib.module.Dir != "" | ||
// Only vendored modules will have lib.module != nil && lib.module.Path != "" && lib.module.Dir == "" as far as I know. | ||
// So the if condition above is already very strict for vendored packages. | ||
// On top of it, we checked the lib.LicensePath contains a vendor folder in it. | ||
// So it's rare to have a false positive for both conditions at the same time, although it may happen in theory. | ||
// | ||
// These assumptions may change in the future, | ||
// so we need to keep this updated with go tooling changes. | ||
parentModDir := splits[0] | ||
var parentPkg *packages.Package | ||
for _, rootPkg := range rootPkgs { | ||
if rootPkg.Module != nil && rootPkg.Module.Dir == parentModDir { | ||
parentPkg = rootPkg | ||
break | ||
} | ||
} | ||
if parentPkg == nil { | ||
klog.Warningf("cannot find parent package of vendored module %s", lib.module.Path) | ||
} else { | ||
// Vendored modules should be commited in the parent module, so it counts as part of the | ||
// parent module. | ||
lib.module = newModule(parentPkg.Module) | ||
parentModDir := splits[0] | ||
var parentPkg *packages.Package | ||
for _, rootPkg := range rootPkgs { | ||
if rootPkg.Module != nil && rootPkg.Module.Dir == parentModDir { | ||
parentPkg = rootPkg | ||
break | ||
} | ||
} | ||
if parentPkg == nil { | ||
klog.Warningf("cannot find parent package of vendored module %s", lib.module.Path) | ||
} else { | ||
// Vendored modules should be commited in the parent module, so it counts as part of the | ||
// parent module. | ||
lib.module = newModule(parentPkg.Module) | ||
} | ||
} | ||
|
||
libraries = append(libraries, lib) | ||
} | ||
// Sort libraries to produce a stable result for snapshot diffing. | ||
|
@@ -228,6 +237,33 @@ func commonAncestor(paths []string) string { | |
return min | ||
} | ||
|
||
func isVendorModule(module *Module, path string) bool { | ||
if module != nil && module.Path != "" && module.Dir == "" { | ||
// A known cause is that the module is vendored, so some information is lost. | ||
splits := strings.SplitN(path, "/vendor/", 2) | ||
if len(splits) != 2 { | ||
klog.Warningf("module %s does not have dir and it's not vendored, cannot discover the license URL. Report to go-licenses developer if you see this.", module.Path) | ||
} else { | ||
// This is vendored. Handle this known special case. | ||
|
||
// Extra note why we identify a vendored package like this. | ||
// | ||
// For a normal package: | ||
// * if it's not in a module, lib.module == nil | ||
// * if it's in a module, lib.module.Dir != "" | ||
// Only vendored modules will have lib.module != nil && lib.module.Path != "" && lib.module.Dir == "" as far as I know. | ||
// So the if condition above is already very strict for vendored packages. | ||
// On top of it, we checked the lib.LicensePath contains a vendor folder in it. | ||
// So it's rare to have a false positive for both conditions at the same time, although it may happen in theory. | ||
// | ||
// These assumptions may change in the future, | ||
// so we need to keep this updated with go tooling changes. | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
|
||
func (l *Library) String() string { | ||
return l.Name() | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not overly happy/attached to this code, but tried to document what I saw when running these build thru our build pipelines. I think the main fix here is by stopping the traversal up at the vendor directory. We could remove all this and just use that instead of trying to be "smart" by making these assumptions. I would not guarantee these are assumptions are even correct. There could def be other cases where the folder structure is deeper or shallower for legit reason.
Looking for feedback here for sure.