package analyzer import ( "context" "errors" "io/fs" "os" "regexp" "sort" "strings" "sync" "github.com/samber/lo" "golang.org/x/exp/slices" "golang.org/x/sync/semaphore" "golang.org/x/xerrors" fos "github.com/aquasecurity/trivy/pkg/fanal/analyzer/os" "github.com/aquasecurity/trivy/pkg/fanal/types" "github.com/aquasecurity/trivy/pkg/log" "github.com/aquasecurity/trivy/pkg/misconf" xio "github.com/aquasecurity/trivy/pkg/x/io" ) var ( analyzers = make(map[Type]analyzer) postAnalyzers = make(map[Type]postAnalyzerInitialize) // ErrUnknownOS occurs when unknown OS is analyzed. ErrUnknownOS = xerrors.New("unknown OS") // ErrPkgAnalysis occurs when the analysis of packages is failed. ErrPkgAnalysis = xerrors.New("failed to analyze packages") // ErrNoPkgsDetected occurs when the required files for an OS package manager are not detected ErrNoPkgsDetected = xerrors.New("no packages detected") ) ////////////////////// // Analyzer options // ////////////////////// // AnalyzerOptions is used to initialize analyzers type AnalyzerOptions struct { Group Group Parallel int FilePatterns []string DisabledAnalyzers []Type MisconfScannerOption misconf.ScannerOption SecretScannerOption SecretScannerOption LicenseScannerOption LicenseScannerOption } type SecretScannerOption struct { ConfigPath string } type LicenseScannerOption struct { // Use license classifier to get better results though the classification is expensive. Full bool ClassifierConfidenceLevel float64 } //////////////// // Interfaces // //////////////// // Initializer represents analyzers that need to take parameters from users type Initializer interface { Init(AnalyzerOptions) error } type analyzer interface { Type() Type Version() int Analyze(ctx context.Context, input AnalysisInput) (*AnalysisResult, error) Required(filePath string, info os.FileInfo) bool } type PostAnalyzer interface { Type() Type Version() int PostAnalyze(ctx context.Context, input PostAnalysisInput) (*AnalysisResult, error) Required(filePath string, info os.FileInfo) bool } //////////////////// // Analyzer group // //////////////////// type Group string const GroupBuiltin Group = "builtin" func RegisterAnalyzer(analyzer analyzer) { if _, ok := analyzers[analyzer.Type()]; ok { log.Fatal("Analyzer is registered twice", log.String("type", string(analyzer.Type()))) } analyzers[analyzer.Type()] = analyzer } type postAnalyzerInitialize func(options AnalyzerOptions) (PostAnalyzer, error) func RegisterPostAnalyzer(t Type, initializer postAnalyzerInitialize) { if _, ok := postAnalyzers[t]; ok { log.Fatal("Analyzer is registered twice", log.String("type", string(t))) } postAnalyzers[t] = initializer } // DeregisterAnalyzer is mainly for testing func DeregisterAnalyzer(t Type) { delete(analyzers, t) } // CustomGroup returns a group name for custom analyzers // This is mainly intended to be used in Aqua products. type CustomGroup interface { Group() Group } type Opener func() (xio.ReadSeekCloserAt, error) type AnalyzerGroup struct { logger *log.Logger analyzers []analyzer postAnalyzers []PostAnalyzer filePatterns map[Type][]*regexp.Regexp } /////////////////////////// // Analyzer input/output // /////////////////////////// type AnalysisInput struct { Dir string FilePath string Info os.FileInfo Content xio.ReadSeekerAt Options AnalysisOptions } type PostAnalysisInput struct { FS fs.FS Options AnalysisOptions } type AnalysisOptions struct { Offline bool FileChecksum bool } type AnalysisResult struct { m sync.Mutex OS types.OS Repository *types.Repository PackageInfos []types.PackageInfo Applications []types.Application Misconfigurations []types.Misconfiguration Secrets []types.Secret Licenses []types.LicenseFile SystemInstalledFiles []string // A list of files installed by OS package manager // Digests contains SHA-256 digests of unpackaged files // used to search for SBOM attestation. Digests map[string]string // For Red Hat BuildInfo *types.BuildInfo // CustomResources hold analysis results from custom analyzers. // It is for extensibility and not used in OSS. CustomResources []types.CustomResource } func NewAnalysisResult() *AnalysisResult { result := new(AnalysisResult) return result } func (r *AnalysisResult) isEmpty() bool { return lo.IsEmpty(r.OS) && r.Repository == nil && len(r.PackageInfos) == 0 && len(r.Applications) == 0 && len(r.Misconfigurations) == 0 && len(r.Secrets) == 0 && len(r.Licenses) == 0 && len(r.SystemInstalledFiles) == 0 && r.BuildInfo == nil && len(r.Digests) == 0 && len(r.CustomResources) == 0 } func (r *AnalysisResult) Sort() { // OS packages sort.Slice(r.PackageInfos, func(i, j int) bool { return r.PackageInfos[i].FilePath < r.PackageInfos[j].FilePath }) for _, pi := range r.PackageInfos { sort.Sort(pi.Packages) } // Language-specific packages sort.Slice(r.Applications, func(i, j int) bool { if r.Applications[i].FilePath != r.Applications[j].FilePath { return r.Applications[i].FilePath < r.Applications[j].FilePath } return r.Applications[i].Type < r.Applications[j].Type }) for _, app := range r.Applications { sort.Sort(app.Packages) } // Custom resources sort.Slice(r.CustomResources, func(i, j int) bool { return r.CustomResources[i].FilePath < r.CustomResources[j].FilePath }) // Misconfigurations sort.Slice(r.Misconfigurations, func(i, j int) bool { return r.Misconfigurations[i].FilePath < r.Misconfigurations[j].FilePath }) // Secrets sort.Slice(r.Secrets, func(i, j int) bool { return r.Secrets[i].FilePath < r.Secrets[j].FilePath }) for _, sec := range r.Secrets { sort.Slice(sec.Findings, func(i, j int) bool { if sec.Findings[i].RuleID != sec.Findings[j].RuleID { return sec.Findings[i].RuleID < sec.Findings[j].RuleID } return sec.Findings[i].StartLine < sec.Findings[j].StartLine }) } // License files sort.Slice(r.Licenses, func(i, j int) bool { if r.Licenses[i].Type == r.Licenses[j].Type { if r.Licenses[i].FilePath == r.Licenses[j].FilePath { return r.Licenses[i].Layer.DiffID < r.Licenses[j].Layer.DiffID } else { return r.Licenses[i].FilePath < r.Licenses[j].FilePath } } return r.Licenses[i].Type < r.Licenses[j].Type }) } func (r *AnalysisResult) Merge(newResult *AnalysisResult) { if newResult == nil || newResult.isEmpty() { return } // this struct is accessed by multiple goroutines r.m.Lock() defer r.m.Unlock() r.OS.Merge(newResult.OS) if newResult.Repository != nil { r.Repository = newResult.Repository } if len(newResult.PackageInfos) > 0 { r.PackageInfos = append(r.PackageInfos, newResult.PackageInfos...) } if len(newResult.Applications) > 0 { r.Applications = append(r.Applications, newResult.Applications...) } // Merge SHA-256 digests of unpackaged files if newResult.Digests != nil { r.Digests = lo.Assign(r.Digests, newResult.Digests) } r.Misconfigurations = append(r.Misconfigurations, newResult.Misconfigurations...) r.Secrets = append(r.Secrets, newResult.Secrets...) r.Licenses = append(r.Licenses, newResult.Licenses...) r.SystemInstalledFiles = append(r.SystemInstalledFiles, newResult.SystemInstalledFiles...) if newResult.BuildInfo != nil { if r.BuildInfo == nil { r.BuildInfo = newResult.BuildInfo } else { // We don't need to merge build info here // because there is theoretically only one file about build info in each layer. if newResult.BuildInfo.Nvr != "" || newResult.BuildInfo.Arch != "" { r.BuildInfo.Nvr = newResult.BuildInfo.Nvr r.BuildInfo.Arch = newResult.BuildInfo.Arch } if len(newResult.BuildInfo.ContentSets) > 0 { r.BuildInfo.ContentSets = newResult.BuildInfo.ContentSets } } } r.CustomResources = append(r.CustomResources, newResult.CustomResources...) } func belongToGroup(groupName Group, analyzerType Type, disabledAnalyzers []Type, analyzer any) bool { if slices.Contains(disabledAnalyzers, analyzerType) { return false } analyzerGroupName := GroupBuiltin if cg, ok := analyzer.(CustomGroup); ok { analyzerGroupName = cg.Group() } if analyzerGroupName != groupName { return false } return true } const separator = ":" func NewAnalyzerGroup(opt AnalyzerOptions) (AnalyzerGroup, error) { groupName := opt.Group if groupName == "" { groupName = GroupBuiltin } group := AnalyzerGroup{ logger: log.WithPrefix("analyzer"), filePatterns: make(map[Type][]*regexp.Regexp), } for _, p := range opt.FilePatterns { // e.g. "dockerfile:my_dockerfile_*" s := strings.SplitN(p, separator, 2) if len(s) != 2 { return group, xerrors.Errorf("invalid file pattern (%s) expected format: \"fileType:regexPattern\" e.g. \"dockerfile:my_dockerfile_*\"", p) } fileType, pattern := s[0], s[1] r, err := regexp.Compile(pattern) if err != nil { return group, xerrors.Errorf("invalid file regexp (%s): %w", p, err) } if _, ok := group.filePatterns[Type(fileType)]; !ok { group.filePatterns[Type(fileType)] = []*regexp.Regexp{} } group.filePatterns[Type(fileType)] = append(group.filePatterns[Type(fileType)], r) } for analyzerType, a := range analyzers { if !belongToGroup(groupName, analyzerType, opt.DisabledAnalyzers, a) { continue } // Initialize only scanners that have Init() if ini, ok := a.(Initializer); ok { if err := ini.Init(opt); err != nil { return AnalyzerGroup{}, xerrors.Errorf("analyzer initialization error: %w", err) } } group.analyzers = append(group.analyzers, a) } for analyzerType, init := range postAnalyzers { a, err := init(opt) if err != nil { return AnalyzerGroup{}, xerrors.Errorf("post-analyzer init error: %w", err) } if !belongToGroup(groupName, analyzerType, opt.DisabledAnalyzers, a) { continue } group.postAnalyzers = append(group.postAnalyzers, a) } return group, nil } type Versions struct { Analyzers map[string]int PostAnalyzers map[string]int } // AnalyzerVersions returns analyzer version identifier used for cache keys. func (ag AnalyzerGroup) AnalyzerVersions() Versions { analyzerVersions := make(map[string]int) for _, a := range ag.analyzers { analyzerVersions[string(a.Type())] = a.Version() } postAnalyzerVersions := make(map[string]int) for _, a := range ag.postAnalyzers { postAnalyzerVersions[string(a.Type())] = a.Version() } return Versions{ Analyzers: analyzerVersions, PostAnalyzers: postAnalyzerVersions, } } // AnalyzeFile determines which files are required by the analyzers based on the file name and attributes, // and passes only those files to the analyzer for analysis. // This function may be called concurrently and must be thread-safe. func (ag AnalyzerGroup) AnalyzeFile(ctx context.Context, wg *sync.WaitGroup, limit *semaphore.Weighted, result *AnalysisResult, dir, filePath string, info os.FileInfo, opener Opener, disabled []Type, opts AnalysisOptions) error { if info.IsDir() { return nil } // filepath extracted from tar file doesn't have the prefix "/" cleanPath := strings.TrimLeft(filePath, "/") for _, a := range ag.analyzers { // Skip disabled analyzers if slices.Contains(disabled, a.Type()) { continue } if !ag.filePatternMatch(a.Type(), cleanPath) && !a.Required(cleanPath, info) { continue } rc, err := opener() if errors.Is(err, fs.ErrPermission) { ag.logger.Debug("Permission error", log.String("file_path", filePath)) break } else if err != nil { return xerrors.Errorf("unable to open %s: %w", filePath, err) } if err = limit.Acquire(ctx, 1); err != nil { return xerrors.Errorf("semaphore acquire: %w", err) } wg.Add(1) go func(a analyzer, rc xio.ReadSeekCloserAt) { defer limit.Release(1) defer wg.Done() defer rc.Close() ret, err := a.Analyze(ctx, AnalysisInput{ Dir: dir, FilePath: filePath, Info: info, Content: rc, Options: opts, }) if err != nil && !errors.Is(err, fos.AnalyzeOSError) { ag.logger.Debug("Analysis error", log.Err(err)) return } result.Merge(ret) }(a, rc) } return nil } // RequiredPostAnalyzers returns a list of analyzer types that require the given file. func (ag AnalyzerGroup) RequiredPostAnalyzers(filePath string, info os.FileInfo) []Type { if info.IsDir() { return nil } var postAnalyzerTypes []Type for _, a := range ag.postAnalyzers { if ag.filePatternMatch(a.Type(), filePath) || a.Required(filePath, info) { postAnalyzerTypes = append(postAnalyzerTypes, a.Type()) } } return postAnalyzerTypes } // PostAnalyze passes a virtual filesystem containing only required files // and passes it to the respective post-analyzer. // The obtained results are merged into the "result". // This function may be called concurrently and must be thread-safe. func (ag AnalyzerGroup) PostAnalyze(ctx context.Context, compositeFS *CompositeFS, result *AnalysisResult, opts AnalysisOptions) error { for _, a := range ag.postAnalyzers { fsys, ok := compositeFS.Get(a.Type()) if !ok { continue } skippedFiles := result.SystemInstalledFiles for _, app := range result.Applications { skippedFiles = append(skippedFiles, app.FilePath) for _, pkg := range app.Packages { // The analysis result could contain packages listed in SBOM. // The files of those packages don't have to be analyzed. // This is especially helpful for expensive post-analyzers such as the JAR analyzer. if pkg.FilePath != "" { skippedFiles = append(skippedFiles, pkg.FilePath) } } } filteredFS, err := fsys.Filter(skippedFiles) if err != nil { return xerrors.Errorf("unable to filter filesystem: %w", err) } res, err := a.PostAnalyze(ctx, PostAnalysisInput{ FS: filteredFS, Options: opts, }) if err != nil { return xerrors.Errorf("post analysis error: %w", err) } result.Merge(res) } return nil } // PostAnalyzerFS returns a composite filesystem that contains multiple filesystems for each post-analyzer func (ag AnalyzerGroup) PostAnalyzerFS() (*CompositeFS, error) { return NewCompositeFS(ag) } func (ag AnalyzerGroup) filePatternMatch(analyzerType Type, filePath string) bool { for _, pattern := range ag.filePatterns[analyzerType] { if pattern.MatchString(filePath) { return true } } return false }