Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dotnet pe support #505

Open
wants to merge 30 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
e4494d0
add: boilerplate
alessandro-Doyensec Feb 17, 2025
6009c7a
add: extractor to the list of available ones
alessandro-Doyensec Feb 17, 2025
bb30e56
add: first implementation using github.com/saferwall/pe
alessandro-Doyensec Feb 17, 2025
33b9a97
fix: copyright format
alessandro-Doyensec Feb 17, 2025
0510fbc
add: extension checking
alessandro-Doyensec Feb 19, 2025
e90c8b9
Merge remote-tracking branch 'origin/main' into dotnet-pe-support
alessandro-Doyensec Feb 19, 2025
f7f004b
add: simple tests
alessandro-Doyensec Feb 19, 2025
626948d
add: comments and capabilites
alessandro-Doyensec Feb 20, 2025
3136938
remove: default size as .DLL are probably bigger then 20MB
alessandro-Doyensec Feb 20, 2025
0113355
add: AfterFileExtracted
alessandro-Doyensec Feb 24, 2025
3e83ddd
add: cleanup after GetRealPath
alessandro-Doyensec Feb 24, 2025
b608c97
fix: remove capabilities since they're not necessary when using GetRe…
alessandro-Doyensec Feb 24, 2025
ecbd39a
add: docs
alessandro-Doyensec Feb 25, 2025
f328b83
add: early stop condition before parsing the file
alessandro-Doyensec Feb 26, 2025
6dac6bb
Merge remote-tracking branch 'origin/main' into dotnet-pe-support
alessandro-Doyensec Feb 27, 2025
42061e5
fix: Handle errors in the if branch and normal execution outside
alessandro-Doyensec Feb 28, 2025
a60edd9
fix: function ordering
alessandro-Doyensec Feb 28, 2025
ce60817
fix: remove peparser alias
alessandro-Doyensec Feb 28, 2025
1fac7a5
fix: aliasing leftovers
alessandro-Doyensec Feb 28, 2025
c74c6ff
add: comment to public vars
alessandro-Doyensec Mar 4, 2025
08b2123
add: Also update supported_inventory_types.md
alessandro-Doyensec Mar 4, 2025
2367e11
add: plugin.OSWindows here so this plugin only runs for Windows
alessandro-Doyensec Mar 4, 2025
8636aa9
edit: inline isPELikelyExtension
alessandro-Doyensec Mar 4, 2025
945734c
edit: propagate the read error up
alessandro-Doyensec Mar 4, 2025
221f4d7
add: tableContentToInventories function
alessandro-Doyensec Mar 4, 2025
e9bfa99
add: upper case extension test
alessandro-Doyensec Mar 4, 2025
6b1c3e8
add: comment on fileSizeBytes defaulting with 1K if not specified
alessandro-Doyensec Mar 4, 2025
1df7206
edit: remove public errors and improove testing readability
alessandro-Doyensec Mar 4, 2025
0ea3760
Merge remote-tracking branch 'origin/main' into dotnet-pe-support
alessandro-Doyensec Mar 10, 2025
448d9e9
Merge remote-tracking branch 'origin/main' into dotnet-pe-support
alessandro-Doyensec Mar 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
269 changes: 269 additions & 0 deletions extractor/filesystem/language/dotnet/dotnetpe/dotnetpe.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package dotnetpe extracts packages from .NET PE files.
package dotnetpe

import (
"context"
"encoding/binary"
"errors"
"fmt"
"os"
"path/filepath"
"strings"

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scalibr/extractor/filesystem"
"github.com/google/osv-scalibr/log"
"github.com/google/osv-scalibr/plugin"
"github.com/google/osv-scalibr/purl"
"github.com/google/osv-scalibr/stats"
"github.com/saferwall/pe"
)

const (
// Name is the unique Name of this extractor.
Name = "dotnet/pe"
)

var (
// Supported extensions for Portable Executable (PE) files.
// This list may not be exhaustive, as the PE standard does not mandate specific extensions.
// The empty string is intentionally included to handle files without extensions.
peExtensions = []string{
".acm", ".ax", ".cpl", ".dll", ".drv", ".efi", ".exe", ".mui", ".ocx",
".scr", ".sys", ".tsp", ".mun", ".msstyles", "",
}

ErrOpeningPEFile = errors.New("error opening PE file")
ErrParsingPEFile = errors.New("error parsing PE file")
)

// Extractor extracts dotnet dependencies from a PE file
type Extractor struct {
cfg Config
}

// Config is the configuration for the .NET PE extractor.
type Config struct {
// Stats is a stats collector for reporting metrics.
Stats stats.Collector
// MaxFileSizeBytes is the maximum file size this extractor will parse. If
// `FileRequired` gets a bigger file, it will return false.
// Use 0 to accept all file sizes
MaxFileSizeBytes int64
}

// DefaultConfig returns the default configuration of the extractor.
func DefaultConfig() Config {
return Config{}
}

// New returns an .NET PE extractor.
//
// For most use cases, initialize with:
// ```
// e := New(DefaultConfig())
// ```
func New(cfg Config) *Extractor {
return &Extractor{
cfg: cfg,
}
}

// NewDefault returns the extractor with its default configuration.
func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }

// Name of the extractor.
func (e Extractor) Name() string { return Name }

// Version of the extractor.
func (e Extractor) Version() int { return 0 }

// Requirements of the extractor.
func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }

// FileRequired returns true if the specified file matches the .NET PE file structure.
func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
path := api.Path()

if !isPELikelyExtension(path) {
return false
}

fileinfo, err := api.Stat()
if err != nil || (e.cfg.MaxFileSizeBytes > 0 && fileinfo.Size() > e.cfg.MaxFileSizeBytes) {
e.reportFileRequired(path, stats.FileRequiredResultSizeLimitExceeded)
return false
}

e.reportFileRequired(path, stats.FileRequiredResultOK)
return true
}

func isPELikelyExtension(path string) bool {
ext := filepath.Ext(path)
for _, peExt := range peExtensions {
if strings.EqualFold(ext, peExt) {
return true
}
}
return false
}

// Extract parses the PE files to extract .NET package dependencies.
func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) {
inventory, err := e.extractFromInput(input)
if e.cfg.Stats != nil {
var fileSizeBytes int64
if input.Info != nil {
fileSizeBytes = input.Info.Size()
}
e.cfg.Stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
Path: input.Path,
Result: filesystem.ExtractorErrorToFileExtractedResult(err),
FileSizeBytes: fileSizeBytes,
})
}
return inventory, err
}

func (e Extractor) extractFromInput(input *filesystem.ScanInput) ([]*extractor.Inventory, error) {
// check if the file has the needed magic bytes before doing the heavy parsing
if !hasPEMagicBytes(input) {
return nil, fmt.Errorf("the file header does not contain magic bytes %w", ErrOpeningPEFile)
}

// Retrieve the real path of the file
absPath, err := input.GetRealPath()
if err != nil {
return nil, err
}

if input.Root == "" {
// The file got copied to a temporary dir, remove it at the end.
defer func() {
dir := filepath.Base(absPath)
if err := os.RemoveAll(dir); err != nil {
log.Errorf("os.RemoveAll(%q%): %w", dir, err)
}
}()
}

// Open the PE file
f, err := pe.New(absPath, &pe.Options{})
if err != nil {
return nil, errors.Join(ErrOpeningPEFile, err)
}

// Parse the PE file
if err := f.Parse(); err != nil {
return nil, errors.Join(ErrParsingPEFile, err)
}

// Initialize inventory slice to store the dependencies
var ivs []*extractor.Inventory

// Iterate over the CLR Metadata Tables to extract assembly information
for _, table := range f.CLR.MetadataTables {
switch content := table.Content.(type) {
case []pe.AssemblyTableRow:
for _, row := range content {
name := string(f.GetStringFromData(row.Name, f.CLR.MetadataStreams["#Strings"])) + ".dll"
version := fmt.Sprintf("%d.%d.%d.%d", row.MajorVersion, row.MinorVersion, row.BuildNumber, row.RevisionNumber)
ivs = append(ivs, &extractor.Inventory{
Name: name,
Version: version,
})
}
case []pe.AssemblyRefTableRow:
for _, row := range content {
name := string(f.GetStringFromData(row.Name, f.CLR.MetadataStreams["#Strings"])) + ".dll"
version := fmt.Sprintf("%d.%d.%d.%d", row.MajorVersion, row.MinorVersion, row.BuildNumber, row.RevisionNumber)
ivs = append(ivs, &extractor.Inventory{
Name: name,
Version: version,
})
}
}
}

// if at least an inventory was found inside the CLR.MetadataTables there is no need to check the VersionResources
if len(ivs) > 0 {
return ivs, nil
}

// If no inventory entries were found in CLR.MetadataTables check the VersionResources as a fallback
versionResources, err := f.ParseVersionResources()
if err != nil {
return nil, err
}

name, version := versionResources["InternalName"], versionResources["Assembly Version"]
if name != "" && version != "" {
ivs = append(ivs, &extractor.Inventory{
Name: name,
Version: version,
})
}

return ivs, nil
}

// hasPEMagicBytes checks if a given file has the PE magic bytes in the header
func hasPEMagicBytes(input *filesystem.ScanInput) bool {
// check for the smallest PE size.
if input.Info.Size() < pe.TinyPESize {
return false
}

var magic uint16
if err := binary.Read(input.Reader, binary.LittleEndian, &magic); err != nil {
return false
}

// Validate if the magic bytes match any of the expected PE signatures
if magic != pe.ImageDOSSignature &&
magic != pe.ImageDOSZMSignature {
return false
}
return true
}

func (e Extractor) reportFileRequired(path string, result stats.FileRequiredResult) {
if e.cfg.Stats == nil {
return
}
e.cfg.Stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
Path: path,
Result: result,
})
}

// ToPURL converts an inventory created by this extractor into a PURL.
func (e Extractor) ToPURL(i *extractor.Inventory) *purl.PackageURL {
return &purl.PackageURL{
Type: purl.TypeNuget,
Name: i.Name,
Version: i.Version,
}
}

// Ecosystem implements filesystem.Extractor.
func (e Extractor) Ecosystem(i *extractor.Inventory) string {
return "NuGet"
}

var _ filesystem.Extractor = Extractor{}
Loading
Loading