Skip to content

Commit 35c5957

Browse files
authored
Fix panic when inferring imports (#575)
Also adds new tests and fixes other (less severe) issues when using the InferImportPaths.
1 parent a276f9d commit 35c5957

File tree

2 files changed

+232
-34
lines changed

2 files changed

+232
-34
lines changed

desc/protoparse/parser.go

+128-33
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"google.golang.org/protobuf/types/descriptorpb"
2525

2626
"github.com/jhump/protoreflect/desc"
27+
"github.com/jhump/protoreflect/desc/internal"
2728
"github.com/jhump/protoreflect/desc/protoparse/ast"
2829
)
2930

@@ -155,8 +156,15 @@ func (p Parser) ParseFiles(filenames ...string) ([]*desc.FileDescriptor, error)
155156
return nil, err
156157
}
157158
// then we can infer import paths
158-
// TODO: if this re-writes one of the names in filenames, lookups below will break
159-
results = fixupFilenames(results)
159+
var rewritten map[string]string
160+
results, rewritten = fixupFilenames(results)
161+
if len(rewritten) > 0 {
162+
for i := range filenames {
163+
if replace, ok := rewritten[filenames[i]]; ok {
164+
filenames[i] = replace
165+
}
166+
}
167+
}
160168
resolverFromResults := protocompile.ResolverFunc(func(path string) (protocompile.SearchResult, error) {
161169
res, ok := results[path]
162170
if !ok {
@@ -244,7 +252,15 @@ func (p Parser) ParseFilesButDoNotLink(filenames ...string) ([]*descriptorpb.Fil
244252
for _, res := range results {
245253
resultsMap[res.FileDescriptorProto().GetName()] = res
246254
}
247-
resultsMap = fixupFilenames(resultsMap)
255+
var rewritten map[string]string
256+
resultsMap, rewritten = fixupFilenames(resultsMap)
257+
if len(rewritten) > 0 {
258+
for i := range filenames {
259+
if replace, ok := rewritten[filenames[i]]; ok {
260+
filenames[i] = replace
261+
}
262+
}
263+
}
248264
for i := range filenames {
249265
results[i] = resultsMap[filenames[i]]
250266
}
@@ -362,48 +378,100 @@ func parseToProtos(res protocompile.Resolver, filenames []string, rep *reporter.
362378
func parseToProtosRecursive(res protocompile.Resolver, filenames []string, rep *reporter.Handler, srcPosAddr *SourcePos) (map[string]parser.Result, error) {
363379
results := make(map[string]parser.Result, len(filenames))
364380
for _, filename := range filenames {
365-
parseToProtoRecursive(res, filename, rep, srcPosAddr, results)
381+
if err := parseToProtoRecursive(res, filename, rep, srcPosAddr, results); err != nil {
382+
return results, err
383+
}
366384
}
367385
return results, rep.Error()
368386
}
369387

370-
func parseToProtoRecursive(res protocompile.Resolver, filename string, rep *reporter.Handler, srcPosAddr *SourcePos, results map[string]parser.Result) {
388+
func parseToProtoRecursive(res protocompile.Resolver, filename string, rep *reporter.Handler, srcPosAddr *SourcePos, results map[string]parser.Result) error {
371389
if _, ok := results[filename]; ok {
372390
// already processed this one
373-
return
391+
return nil
374392
}
375393
results[filename] = nil // placeholder entry
376394

377-
astRoot, parseResult, _ := parseToAST(res, filename, rep)
378-
if rep.ReporterError() != nil {
379-
return
395+
astRoot, parseResult, err := parseToAST(res, filename, rep)
396+
if err != nil {
397+
return err
380398
}
381399
if parseResult == nil {
382-
parseResult, _ = parser.ResultFromAST(astRoot, true, rep)
383-
if rep.ReporterError() != nil {
384-
return
400+
parseResult, err = parser.ResultFromAST(astRoot, true, rep)
401+
if err != nil {
402+
return err
385403
}
386404
}
387405
results[filename] = parseResult
388406

389-
for _, decl := range astRoot.Decls {
390-
imp, ok := decl.(*ast2.ImportNode)
391-
if !ok {
392-
continue
407+
if astRoot != nil {
408+
// We have an AST, so we use it to recursively examine imports.
409+
for _, decl := range astRoot.Decls {
410+
imp, ok := decl.(*ast2.ImportNode)
411+
if !ok {
412+
continue
413+
}
414+
err := func() error {
415+
orig := *srcPosAddr
416+
*srcPosAddr = astRoot.NodeInfo(imp.Name).Start()
417+
defer func() {
418+
*srcPosAddr = orig
419+
}()
420+
421+
return parseToProtoRecursive(res, imp.Name.AsString(), rep, srcPosAddr, results)
422+
}()
423+
if err != nil {
424+
return err
425+
}
393426
}
394-
func() {
427+
return nil
428+
}
429+
430+
// Without an AST, we must recursively examine the proto. This makes it harder
431+
// (but not necessarily impossible) to get the source location of the import.
432+
fd := parseResult.FileDescriptorProto()
433+
for i, dep := range fd.Dependency {
434+
path := []int32{internal.File_dependencyTag, int32(i)}
435+
err := func() error {
395436
orig := *srcPosAddr
396-
*srcPosAddr = astRoot.NodeInfo(imp.Name).Start()
437+
found := false
438+
for _, loc := range fd.GetSourceCodeInfo().GetLocation() {
439+
if pathsEqual(loc.Path, path) {
440+
*srcPosAddr = SourcePos{
441+
Filename: dep,
442+
Line: int(loc.Span[0]),
443+
Col: int(loc.Span[1]),
444+
}
445+
found = true
446+
break
447+
}
448+
}
449+
if !found {
450+
*srcPosAddr = *ast.UnknownPos(dep)
451+
}
397452
defer func() {
398453
*srcPosAddr = orig
399454
}()
400455

401-
parseToProtoRecursive(res, imp.Name.AsString(), rep, srcPosAddr, results)
456+
return parseToProtoRecursive(res, dep, rep, srcPosAddr, results)
402457
}()
403-
if rep.ReporterError() != nil {
404-
return
458+
if err != nil {
459+
return err
405460
}
406461
}
462+
return nil
463+
}
464+
465+
func pathsEqual(a, b []int32) bool {
466+
if len(a) != len(b) {
467+
return false
468+
}
469+
for i := range a {
470+
if a[i] != b[i] {
471+
return false
472+
}
473+
}
474+
return true
407475
}
408476

409477
func newReporter(errRep ErrorReporter, warnRep WarningReporter) reporter.Reporter {
@@ -487,11 +555,12 @@ func (p Parser) getResolver(filenames []string) (protocompile.Resolver, *SourceP
487555
}, &srcPos
488556
}
489557

490-
func fixupFilenames(protos map[string]parser.Result) map[string]parser.Result {
558+
func fixupFilenames(protos map[string]parser.Result) (revisedProtos map[string]parser.Result, rewrittenPaths map[string]string) {
491559
// In the event that the given filenames (keys in the supplied map) do not
492560
// match the actual paths used in 'import' statements in the files, we try
493561
// to revise names in the protos so that they will match and be linkable.
494-
revisedProtos := map[string]parser.Result{}
562+
revisedProtos = make(map[string]parser.Result, len(protos))
563+
rewrittenPaths = make(map[string]string, len(protos))
495564

496565
protoPaths := map[string]struct{}{}
497566
// TODO: this is O(n^2) but could likely be O(n) with a clever data structure (prefix tree that is indexed backwards?)
@@ -501,7 +570,7 @@ func fixupFilenames(protos map[string]parser.Result) map[string]parser.Result {
501570
candidatesAvailable[name] = struct{}{}
502571
for _, f := range protos {
503572
for _, imp := range f.FileDescriptorProto().Dependency {
504-
if strings.HasSuffix(name, imp) {
573+
if strings.HasSuffix(name, imp) || strings.HasSuffix(imp, name) {
505574
candidates := importCandidates[imp]
506575
if candidates == nil {
507576
candidates = map[string]struct{}{}
@@ -529,37 +598,62 @@ func fixupFilenames(protos map[string]parser.Result) map[string]parser.Result {
529598
if best == "" {
530599
best = c
531600
} else {
532-
// HACK: we can't actually tell which files is supposed to match
533-
// this import, so arbitrarily pick the "shorter" one (fewest
534-
// path elements) or, on a tie, the lexically earlier one
601+
// NB: We can't actually tell which file is supposed to match
602+
// this import. So we prefer the longest name. On a tie, we
603+
// choose the lexically earliest match.
535604
minLen := strings.Count(best, string(filepath.Separator))
536605
cLen := strings.Count(c, string(filepath.Separator))
537-
if cLen < minLen || (cLen == minLen && c < best) {
606+
if cLen > minLen || (cLen == minLen && c < best) {
538607
best = c
539608
}
540609
}
541610
}
542611
if best != "" {
543-
prefix := best[:len(best)-len(imp)]
544-
if len(prefix) > 0 {
612+
if len(best) > len(imp) {
613+
prefix := best[:len(best)-len(imp)]
545614
protoPaths[prefix] = struct{}{}
546615
}
547616
f := protos[best]
548617
f.FileDescriptorProto().Name = proto.String(imp)
549618
revisedProtos[imp] = f
619+
rewrittenPaths[best] = imp
550620
delete(candidatesAvailable, best)
621+
622+
// If other candidates are actually references to the same file, remove them.
623+
for c := range candidates {
624+
if _, ok := candidatesAvailable[c]; !ok {
625+
// already used this candidate and re-written its filename accordingly
626+
continue
627+
}
628+
possibleDup := protos[c]
629+
prevName := possibleDup.FileDescriptorProto().Name
630+
possibleDup.FileDescriptorProto().Name = proto.String(imp)
631+
if !proto.Equal(f.FileDescriptorProto(), protos[c].FileDescriptorProto()) {
632+
// not equal: restore name and look at next one
633+
possibleDup.FileDescriptorProto().Name = prevName
634+
continue
635+
}
636+
// This file used a different name but was actually the same file. So
637+
// we prune it from the set.
638+
rewrittenPaths[c] = imp
639+
delete(candidatesAvailable, c)
640+
if len(c) > len(imp) {
641+
prefix := c[:len(c)-len(imp)]
642+
protoPaths[prefix] = struct{}{}
643+
}
644+
}
551645
}
552646
}
553647

554648
if len(candidatesAvailable) == 0 {
555-
return revisedProtos
649+
return revisedProtos, rewrittenPaths
556650
}
557651

558652
if len(protoPaths) == 0 {
559653
for c := range candidatesAvailable {
560654
revisedProtos[c] = protos[c]
561655
}
562-
return revisedProtos
656+
return revisedProtos, rewrittenPaths
563657
}
564658

565659
// Any remaining candidates are entry-points (not imported by others), so
@@ -588,12 +682,13 @@ func fixupFilenames(protos map[string]parser.Result) map[string]parser.Result {
588682
f.FileDescriptorProto().Name = proto.String(imp)
589683
f.FileNode()
590684
revisedProtos[imp] = f
685+
rewrittenPaths[c] = imp
591686
} else {
592687
revisedProtos[c] = protos[c]
593688
}
594689
}
595690

596-
return revisedProtos
691+
return revisedProtos, rewrittenPaths
597692
}
598693

599694
func removeDynamicExtensions(fd protoreflect.FileDescriptor, alreadySeen map[string]struct{}) {

desc/protoparse/parser_test.go

+104-1
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ func TestSimpleParse(t *testing.T) {
154154
// We'll also check our fixup logic to make sure it correctly rewrites the
155155
// names of the files to match corresponding import statementes. This should
156156
// strip the "../../internal/testprotos/" prefix from each file.
157-
protos = fixupFilenames(protos)
157+
protos, _ = fixupFilenames(protos)
158158
var actual []string
159159
for n := range protos {
160160
actual = append(actual, n)
@@ -422,3 +422,106 @@ message Foo {
422422
comment := fds[0].GetMessageTypes()[0].GetFields()[0].GetSourceInfo().GetLeadingComments()
423423
testutil.Eq(t, " leading comments\n", comment)
424424
}
425+
426+
func TestParseInferImportPaths_SimpleNoOp(t *testing.T) {
427+
sources := map[string]string{
428+
"test.proto": `
429+
syntax = "proto3";
430+
import "google/protobuf/struct.proto";
431+
message Foo {
432+
string name = 1;
433+
repeated uint64 refs = 2;
434+
google.protobuf.Struct meta = 3;
435+
}`,
436+
}
437+
p := Parser{
438+
Accessor: FileContentsFromMap(sources),
439+
InferImportPaths: true,
440+
}
441+
fds, err := p.ParseFiles("test.proto")
442+
testutil.Ok(t, err)
443+
testutil.Eq(t, 1, len(fds))
444+
}
445+
446+
func TestParseInferImportPaths_FixesNestedPaths(t *testing.T) {
447+
sources := FileContentsFromMap(map[string]string{
448+
"/foo/bar/a.proto": `
449+
syntax = "proto3";
450+
import "baz/b.proto";
451+
message A {
452+
B b = 1;
453+
}`,
454+
"/foo/bar/baz/b.proto": `
455+
syntax = "proto3";
456+
import "baz/c.proto";
457+
message B {
458+
C c = 1;
459+
}`,
460+
"/foo/bar/baz/c.proto": `
461+
syntax = "proto3";
462+
message C {}`,
463+
"/foo/bar/baz/d.proto": `
464+
syntax = "proto3";
465+
import "a.proto";
466+
message D {
467+
A a = 1;
468+
}`,
469+
})
470+
471+
testCases := []struct {
472+
name string
473+
cwd string
474+
filenames []string
475+
expect []string
476+
}{
477+
{
478+
name: "outside hierarchy",
479+
cwd: "/buzz",
480+
filenames: []string{"../foo/bar/a.proto", "../foo/bar/baz/b.proto", "../foo/bar/baz/c.proto", "../foo/bar/baz/d.proto"},
481+
},
482+
{
483+
name: "inside hierarchy",
484+
cwd: "/foo",
485+
filenames: []string{"bar/a.proto", "bar/baz/b.proto", "bar/baz/c.proto", "bar/baz/d.proto"},
486+
},
487+
{
488+
name: "import path root (no-op)",
489+
cwd: "/foo/bar",
490+
filenames: []string{"a.proto", "baz/b.proto", "baz/c.proto", "baz/d.proto"},
491+
},
492+
{
493+
name: "inside leaf directory",
494+
cwd: "/foo/bar/baz",
495+
filenames: []string{"../a.proto", "b.proto", "c.proto", "d.proto"},
496+
// NB: Expected names differ from above cases because nothing imports d.proto.
497+
// So when inferring the root paths, the fact that d.proto is defined in
498+
// the baz sub-directory will not be discovered. That's okay since the parse
499+
// operation still succeeds.
500+
expect: []string{"a.proto", "baz/b.proto", "baz/c.proto", "d.proto"},
501+
},
502+
}
503+
for _, testCase := range testCases {
504+
t.Run(testCase.name, func(t *testing.T) {
505+
p := Parser{
506+
Accessor: sources,
507+
ImportPaths: []string{testCase.cwd, "/foo/bar"},
508+
InferImportPaths: true,
509+
}
510+
fds, err := p.ParseFiles(testCase.filenames...)
511+
testutil.Ok(t, err)
512+
testutil.Eq(t, 4, len(fds))
513+
var expectedNames []string
514+
if len(testCase.expect) == 0 {
515+
expectedNames = []string{"a.proto", "baz/b.proto", "baz/c.proto", "baz/d.proto"}
516+
} else {
517+
testutil.Eq(t, 4, len(testCase.expect))
518+
expectedNames = testCase.expect
519+
}
520+
// check that they have the expected name
521+
testutil.Eq(t, expectedNames[0], fds[0].GetName())
522+
testutil.Eq(t, expectedNames[1], fds[1].GetName())
523+
testutil.Eq(t, expectedNames[2], fds[2].GetName())
524+
testutil.Eq(t, expectedNames[3], fds[3].GetName())
525+
})
526+
}
527+
}

0 commit comments

Comments
 (0)