Skip to content

Commit

Permalink
fix null values in static analysis parsing results (#863)
Browse files Browse the repository at this point in the history
* move 'created' field to top level in static analysis schema

Signed-off-by: Max Fisher <maxfisher@google.com>

* remove trailing comma

Signed-off-by: Max Fisher <maxfisher@google.com>

* fix null values in results where parsing failed, remove unnecessary pointer indirection

Signed-off-by: Max Fisher <maxfisher@google.com>

* move basic data analysis to separate package, don't fail completely if 'file' command doesn't work (#865)

Signed-off-by: Max Fisher <maxfisher@google.com>

---------

Signed-off-by: Max Fisher <maxfisher@google.com>
  • Loading branch information
maxfisher-g authored Sep 7, 2023
1 parent 74979cf commit e88f265
Show file tree
Hide file tree
Showing 9 changed files with 251 additions and 212 deletions.
3 changes: 2 additions & 1 deletion internal/staticanalysis/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"strings"

"github.com/ossf/package-analysis/internal/log"
"github.com/ossf/package-analysis/internal/staticanalysis/basicdata"
"github.com/ossf/package-analysis/internal/staticanalysis/externalcmd"
"github.com/ossf/package-analysis/internal/staticanalysis/obfuscation"
"github.com/ossf/package-analysis/internal/staticanalysis/parsing"
Expand Down Expand Up @@ -78,7 +79,7 @@ func AnalyzePackageFiles(extractDir string, jsParserConfig parsing.ParserConfig,

if runTask[Basic] {
log.Info("run basic analysis")
basicData, err := GetBasicData(fileList, getPathInArchive)
basicData, err := basicdata.Analyze(fileList, getPathInArchive)
if err != nil {
log.Error("static analysis error", log.Label("task", string(Basic)), "error", err)
} else {
Expand Down
163 changes: 0 additions & 163 deletions internal/staticanalysis/basic_data.go

This file was deleted.

116 changes: 116 additions & 0 deletions internal/staticanalysis/basicdata/basic_data.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package basicdata

import (
"fmt"
"os"
"strings"

"github.com/ossf/package-analysis/internal/log"
"github.com/ossf/package-analysis/internal/staticanalysis/linelengths"
"github.com/ossf/package-analysis/internal/utils"
"github.com/ossf/package-analysis/internal/utils/valuecounts"
)

// PackageData records basic information about files in a package,
// mapping file path within the archive to FileData about that file.
type PackageData struct {
Files []FileData `json:"files"`
}

// FileData records various information about a file that can be determined
// without parsing it using a programming language parser.
type FileData struct {
// Filename records the path to the file within the package archive
Filename string `json:"filename"`

// Description records the output of the `file` command run on that file.
Description string `json:"description"`

// Size records the size of the file (as reported by the filesystem).
Size int64 `json:"size"`

// SHA256 records the SHA256 hashsum of the file.
SHA256 string `json:"sha256"`

// LineLengths records the counts of line lengths in the file,
// where a line is defined as all characters up to a newline.
LineLengths valuecounts.ValueCounts `json:"line_lengths"`
}

func (bd FileData) String() string {
parts := []string{
fmt.Sprintf("filename: %v\n", bd.Filename),
fmt.Sprintf("description: %v\n", bd.Description),
fmt.Sprintf("size: %v\n", bd.Size),
fmt.Sprintf("sha256: %v\n", bd.SHA256),
fmt.Sprintf("line lengths: %v\n", bd.LineLengths),
}
return strings.Join(parts, "\n")
}

/*
Analyze collects basic file information for the specified files. Errors are logged
rather than returned where possible, to maximise the amount of data collected.
pathInArchive should return the relative path in the package archive, given an absolute
path to a file in the package. The relative path is used for the result data.
*/
func Analyze(paths []string, pathInArchive func(absolutePath string) string) (*PackageData, error) {
if len(paths) == 0 {
return &PackageData{Files: []FileData{}}, nil
}

descriptions, err := describeFiles(paths)
haveDescriptions := true
if err != nil {
log.Error("failed to get file descriptions", "error", err)
haveDescriptions = false
}
if len(descriptions) != len(paths) {
log.Error(fmt.Sprintf("describeFiles() returned %d results, expecting %d", len(descriptions), len(paths)))
haveDescriptions = false
}

result := PackageData{
Files: []FileData{},
}

for index, filePath := range paths {
archivePath := pathInArchive(filePath)
description := ""
if haveDescriptions {
description = descriptions[index]
}

var fileSize int64
if fileInfo, err := os.Stat(filePath); err != nil {
fileSize = -1 // error value
log.Error("Error during stat file", "path", archivePath, "error", err)
} else {
fileSize = fileInfo.Size()
}

var sha265Sum string
if hash, err := utils.SHA256Hash(filePath); err != nil {
log.Error("Error hashing file", "path", archivePath, "error", err)
} else {
sha265Sum = hash
}

var lineLengths valuecounts.ValueCounts
if ll, err := linelengths.GetLineLengths(filePath, ""); err != nil {
log.Error("Error counting line lengths", "path", archivePath, "error", err)
} else {
lineLengths = valuecounts.Count(ll)
}

result.Files = append(result.Files, FileData{
Filename: archivePath,
Description: description,
Size: fileSize,
SHA256: sha265Sum,
LineLengths: lineLengths,
})
}

return &result, nil
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package staticanalysis
package basicdata

import (
"os"
Expand All @@ -23,14 +23,14 @@ var testFiles = []testFile{
{
filename: "test1.txt",
contents: []byte("hello test 1!\n"),
contentsHash: "sha256:bd96959573979235b87180b0b7513c7f1d5cbf046b263f366f2f10fe1b966494",
contentsHash: "bd96959573979235b87180b0b7513c7f1d5cbf046b263f366f2f10fe1b966494",
fileType: "ASCII text",
lineLengths: valuecounts.Count([]int{13}),
},
{
filename: "test2.txt",
contents: []byte("#! /bin/bash\necho 'Hello test 2'\n"),
contentsHash: "sha256:6179db3c673ceddcdbd384116ae4d301d64e65fc2686db9ba64945677a5a893c",
contentsHash: "6179db3c673ceddcdbd384116ae4d301d64e65fc2686db9ba64945677a5a893c",
fileType: "Bourne-Again shell script, ASCII text executable",
lineLengths: valuecounts.Count([]int{12, 19}),
},
Expand Down Expand Up @@ -75,18 +75,18 @@ func TestGetBasicData(t *testing.T) {
return strings.TrimPrefix(absolutePath, testDir+string(os.PathSeparator))
}

got, err := GetBasicData(paths, getArchivePath)
got, err := Analyze(paths, getArchivePath)
if (err != nil) != tt.wantErr {
t.Errorf("getFileDescriptions() error = %v, wantErr %v", err, tt.wantErr)
t.Errorf("describeFiles() error = %v, wantErr %v", err, tt.wantErr)
return
}

wantData := utils.Transform(tt.files, func(f testFile) BasicFileData {
return BasicFileData{
wantData := utils.Transform(tt.files, func(f testFile) FileData {
return FileData{
Filename: f.filename,
Description: f.fileType,
Size: int64(len(f.contents)),
Hash: f.contentsHash,
SHA256: f.contentsHash,
LineLengths: f.lineLengths,
}
})
Expand Down
Loading

0 comments on commit e88f265

Please sign in to comment.