diff --git a/extractor/filesystem/language/python/poetrylock/poetrylock.go b/extractor/filesystem/language/python/poetrylock/poetrylock.go index 2886f711f..ca0c202b9 100644 --- a/extractor/filesystem/language/python/poetrylock/poetrylock.go +++ b/extractor/filesystem/language/python/poetrylock/poetrylock.go @@ -16,10 +16,14 @@ package poetrylock import ( + "bufio" + "bytes" "context" "fmt" + "io" "path/filepath" "slices" + "strings" "github.com/BurntSushi/toml" "github.com/google/osv-scalibr/extractor" @@ -97,22 +101,30 @@ func resolveGroups(pkg poetryLockPackage) []string { // Extract extracts packages from poetry.lock files passed through the scan input. func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { - var parsedLockfile *poetryLockFile - - _, err := toml.NewDecoder(input.Reader).Decode(&parsedLockfile) - + content, err := io.ReadAll(input.Reader) if err != nil { + return inventory.Inventory{}, fmt.Errorf("could not read file: %w", err) + } + + var parsedLockfile *poetryLockFile + if err := toml.Unmarshal(content, &parsedLockfile); err != nil { return inventory.Inventory{}, fmt.Errorf("could not extract: %w", err) } + packageNames := make([]string, 0, len(parsedLockfile.Packages)) + for _, p := range parsedLockfile.Packages { + packageNames = append(packageNames, p.Name) + } + lineNums := findPackageLineNumbers(content, packageNames) + packages := make([]*extractor.Package, 0, len(parsedLockfile.Packages)) - for _, lockPackage := range parsedLockfile.Packages { + for i, lockPackage := range parsedLockfile.Packages { pkgDetails := &extractor.Package{ Name: lockPackage.Name, Version: lockPackage.Version, PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath(input.Path), + Location: extractor.LocationFromPathAndLine(input.Path, lineNums[i]), Metadata: &osv.DepGroupMetadata{ DepGroupVals: resolveGroups(lockPackage), }, @@ -128,4 +140,73 @@ func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (in return inventory.Inventory{Packages: packages}, nil } +// extractPackageName parses a TOML key-value line and returns the unquoted +// package name if the key is "name". Returns false if the line is not a valid name assignment. +// TODO(b/491518484): Put in common location for all Python extractors to use. +func extractPackageName(line string) (string, bool) { + if !strings.HasPrefix(line, "name") { + return "", false + } + k, _, ok := strings.Cut(line, "=") + if !ok || strings.TrimSpace(k) != "name" { + return "", false + } + var pkg poetryLockPackage + if err := toml.Unmarshal([]byte(line), &pkg); err != nil { + return "", false + } + return pkg.Name, true +} + +// findPackageLineNumbers returns the line numbers of the specified package names. +// If package line number is not found, the value will be 0. +func findPackageLineNumbers(content []byte, packageNames []string) []int { + lineNums := make([]int, len(packageNames)) + if len(packageNames) == 0 { + return lineNums + } + + scanner := bufio.NewScanner(bytes.NewReader(content)) + pkgIdx := 0 + inPackageBlock := false + lineNum := 0 + + for scanner.Scan() { + lineNum++ + line := strings.TrimSpace(scanner.Text()) + + if line == "[[package]]" { + inPackageBlock = true + continue + } + + if line == "[metadata]" { + break + } + + if inPackageBlock && strings.HasPrefix(line, "[") && !strings.HasPrefix(line, "[[package]]") { + inPackageBlock = false + continue + } + + if !inPackageBlock || pkgIdx >= len(packageNames) { + continue + } + + name, ok := extractPackageName(line) + if !ok || name != packageNames[pkgIdx] { + continue + } + + lineNums[pkgIdx] = lineNum + pkgIdx++ + inPackageBlock = false + + if pkgIdx == len(packageNames) { + break + } + } + return lineNums +} + var _ filesystem.Extractor = Extractor{} diff --git a/extractor/filesystem/language/python/poetrylock/poetrylock_test.go b/extractor/filesystem/language/python/poetrylock/poetrylock_test.go index e49603a2e..fdc52ecfd 100644 --- a/extractor/filesystem/language/python/poetrylock/poetrylock_test.go +++ b/extractor/filesystem/language/python/poetrylock/poetrylock_test.go @@ -108,7 +108,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "numpy", Version: "1.23.3", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/one-package.lock"), + Location: extractor.LocationFromPathAndLine("testdata/one-package.lock", 2), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{}, }, @@ -125,7 +125,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "proto-plus", Version: "1.22.0", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/two-packages.lock"), + Location: extractor.LocationFromPathAndLine("testdata/two-packages.lock", 2), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{}, }, @@ -134,7 +134,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "protobuf", Version: "4.21.5", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/two-packages.lock"), + Location: extractor.LocationFromPathAndLine("testdata/two-packages.lock", 16), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{}, }, @@ -151,7 +151,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "emoji", Version: "2.0.0", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/one-package-with-metadata.lock"), + Location: extractor.LocationFromPathAndLine("testdata/one-package-with-metadata.lock", 2), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{}, }, @@ -168,7 +168,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "ike", Version: "0.2.0", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/source-git.lock"), + Location: extractor.LocationFromPathAndLine("testdata/source-git.lock", 2), SourceCode: &extractor.SourceCodeIdentifier{ Commit: "cd66602cd29f61a2d2e7fb995fef1e61708c034d", }, @@ -188,7 +188,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "appdirs", Version: "1.4.4", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/source-legacy.lock"), + Location: extractor.LocationFromPathAndLine("testdata/source-legacy.lock", 2), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{}, }, @@ -205,7 +205,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "numpy", Version: "1.23.3", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/optional-package.lock"), + Location: extractor.LocationFromPathAndLine("testdata/optional-package.lock", 2), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{"optional"}, }, @@ -222,7 +222,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "async-timeout", Version: "5.0.1", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/multiple-packages.v2.lock"), + Location: extractor.LocationFromPathAndLine("testdata/multiple-packages.v2.lock", 4), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{"optional"}, }, @@ -231,7 +231,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "factory-boy", Version: "3.3.1", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/multiple-packages.v2.lock"), + Location: extractor.LocationFromPathAndLine("testdata/multiple-packages.v2.lock", 17), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{"dev"}, }, @@ -240,7 +240,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "faker", Version: "33.3.0", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/multiple-packages.v2.lock"), + Location: extractor.LocationFromPathAndLine("testdata/multiple-packages.v2.lock", 36), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{"dev", "test"}, }, @@ -249,7 +249,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "proto-plus", Version: "1.22.0", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/multiple-packages.v2.lock"), + Location: extractor.LocationFromPathAndLine("testdata/multiple-packages.v2.lock", 52), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{}, }, @@ -258,7 +258,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "proto-plus", Version: "1.23.0", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/multiple-packages.v2.lock"), + Location: extractor.LocationFromPathAndLine("testdata/multiple-packages.v2.lock", 71), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{}, }, @@ -267,7 +267,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "protobuf", Version: "4.25.5", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/multiple-packages.v2.lock"), + Location: extractor.LocationFromPathAndLine("testdata/multiple-packages.v2.lock", 90), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{}, }, @@ -276,7 +276,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "python-dateutil", Version: "2.9.0.post0", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/multiple-packages.v2.lock"), + Location: extractor.LocationFromPathAndLine("testdata/multiple-packages.v2.lock", 111), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{"dev", "test"}, }, @@ -285,7 +285,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "six", Version: "1.17.0", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/multiple-packages.v2.lock"), + Location: extractor.LocationFromPathAndLine("testdata/multiple-packages.v2.lock", 146), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{}, }, @@ -294,7 +294,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "typing-extensions", Version: "4.12.2", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/multiple-packages.v2.lock"), + Location: extractor.LocationFromPathAndLine("testdata/multiple-packages.v2.lock", 158), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{"dev", "test"}, }, @@ -303,7 +303,7 @@ func TestExtractor_Extract(t *testing.T) { Name: "urllib3", Version: "2.3.0", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/multiple-packages.v2.lock"), + Location: extractor.LocationFromPathAndLine("testdata/multiple-packages.v2.lock", 170), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{"dev"}, }, @@ -312,13 +312,39 @@ func TestExtractor_Extract(t *testing.T) { Name: "redis", Version: "5.2.1", PURLType: purl.TypePyPi, - Location: extractor.LocationFromPath("testdata/multiple-packages.v2.lock"), + Location: extractor.LocationFromPathAndLine("testdata/multiple-packages.v2.lock", 126), Metadata: &osv.DepGroupMetadata{ DepGroupVals: []string{"optional"}, }, }, }, }, + { + Name: "names outside package block", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/names-outside-package-block.lock", + }, + WantPackages: []*extractor.Package{ + { + Name: "first-pkg", + Version: "1.0.0", + PURLType: purl.TypePyPi, + Location: extractor.LocationFromPathAndLine("testdata/names-outside-package-block.lock", 5), + Metadata: &osv.DepGroupMetadata{ + DepGroupVals: []string{}, + }, + }, + { + Name: "second-pkg", + Version: "2.0.0", + PURLType: purl.TypePyPi, + Location: extractor.LocationFromPathAndLine("testdata/names-outside-package-block.lock", 18), + Metadata: &osv.DepGroupMetadata{ + DepGroupVals: []string{}, + }, + }, + }, + }, } for _, tt := range tests { diff --git a/extractor/filesystem/language/python/poetrylock/testdata/names-outside-package-block.lock b/extractor/filesystem/language/python/poetrylock/testdata/names-outside-package-block.lock new file mode 100644 index 000000000..4a2f2951a --- /dev/null +++ b/extractor/filesystem/language/python/poetrylock/testdata/names-outside-package-block.lock @@ -0,0 +1,24 @@ +version = 1 + +[[package]] +other-field = "value" +name = "first-pkg" +version = "1.0.0" +description = """ +This is a multiline description. +name = "second-pkg" +""" + +[package.tool.poetry] +name = "first-pkg-tool" + +[[package]] +# name = "fake-pkg" +name_other = "not-this" +name = "second-pkg" +version = "2.0.0" + +[metadata] +lock-version = "1.1" +python-versions = "^3.7" +content-hash = "1e6f29ae514d3dd64bd2a40ee33215bec7a7853fee7a469c9c9445f5e27bc3a3"