Skip to content

Commit 4a5f25f

Browse files
committed
Add basic stream level metadata to catalog entries
This PR does a few big changes, mainly adding the concept of metadata to the catalog and it's entries. We create and emit a few default fields into entries now that let us specify how we will act for a given stream - users can also edit these values (along with adding others) so we need to read that catalog file back in if specified. When reading it in we parse it back into our catalog object and then use that to try and filter the streams we will act on. The options are a little weird here but basically, a user can set "selected" (we cannot) if the selected is not set, then we can check if "selected-by-default" is set, as this is something we typically control and will have turned on. If none of those match or are false then we don't include the stream. If we do not find a catalog file at all - then we will create our default one (same as running --discover) and just use that immediately.
1 parent 2a88a45 commit 4a5f25f

File tree

5 files changed

+196
-48
lines changed

5 files changed

+196
-48
lines changed

cmd/tap-incident/cmd/app.go

+36-4
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,27 @@ func Run(ctx context.Context) (err error) {
8282

8383
if *discoveryMode {
8484
err = tap.Discover(ctx, logger, ol)
85+
if err != nil {
86+
return err
87+
}
8588
} else {
86-
err = tap.Sync(ctx, logger, ol, cl)
87-
}
88-
if err != nil {
89-
return err
89+
// If we're syncing - check if we were given a catalog
90+
var (
91+
catalog *tap.Catalog
92+
err error
93+
)
94+
95+
if *catalogFile != "" {
96+
catalog, err = loadCatalogOrError(ctx, *catalogFile)
97+
if err != nil {
98+
return err
99+
}
100+
}
101+
102+
err = tap.Sync(ctx, logger, ol, cl, catalog)
103+
if err != nil {
104+
return err
105+
}
90106
}
91107

92108
return nil
@@ -113,6 +129,22 @@ func versionStanza() string {
113129
)
114130
}
115131

132+
func loadCatalogOrError(ctx context.Context, catalogFile string) (catalog *tap.Catalog, err error) {
133+
defer func() {
134+
if err == nil {
135+
return
136+
}
137+
OUT("Failed to load catalog file!\n")
138+
}()
139+
140+
catalog, err = tap.CatalogFileLoader(catalogFile).Load(ctx)
141+
if err != nil {
142+
return nil, errors.Wrap(err, "loading catalog")
143+
}
144+
145+
return catalog, nil
146+
}
147+
116148
func loadConfigOrError(ctx context.Context, configFile string) (cfg *config.Config, err error) {
117149
defer func() {
118150
if err == nil {

config/loader.go

-35
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@ package config
33
import (
44
"context"
55
"os"
6-
"time"
7-
8-
kitlog "github.com/go-kit/log"
96
)
107

118
type Loader interface {
@@ -29,35 +26,3 @@ func (l FileLoader) Load(context.Context) (*Config, error) {
2926

3027
return Parse(string(l), data)
3128
}
32-
33-
// NewCachedLoader caches a loader to avoid repeated lookups.
34-
func NewCachedLoader(logger kitlog.Logger, loader Loader, ttl time.Duration) Loader {
35-
return &cachedLoader{
36-
logger: logger,
37-
loader: loader,
38-
ttl: ttl,
39-
}
40-
}
41-
42-
type cachedLoader struct {
43-
logger kitlog.Logger
44-
loader Loader
45-
ttl time.Duration
46-
cfg *Config
47-
lastUpdated time.Time
48-
}
49-
50-
func (c *cachedLoader) Load(ctx context.Context) (cfg *Config, err error) {
51-
if c.cfg == nil || time.Since(c.lastUpdated) > c.ttl {
52-
c.logger.Log("event", "loading_cofig", "msg", "cache expired, loading config")
53-
cfg, err := c.loader.Load(ctx)
54-
if err != nil {
55-
return nil, err
56-
}
57-
58-
c.cfg = cfg
59-
c.lastUpdated = time.Now()
60-
}
61-
62-
return c.cfg, nil
63-
}

tap/catalog.go

+78-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
package tap
22

3-
import "github.com/incident-io/singer-tap/model"
3+
import (
4+
"context"
5+
"encoding/json"
6+
"os"
7+
8+
"github.com/incident-io/singer-tap/model"
9+
"github.com/pkg/errors"
10+
)
411

512
// A catalog can contain several streams or "entries"
613
type CatalogEntry struct {
@@ -15,7 +22,7 @@ type CatalogEntry struct {
1522
Schema model.Schema `json:"schema"`
1623

1724
// Optional metadata for this stream
18-
// Metadata *[]Metadata `json:"metadata,omitempty"`
25+
Metadata *[]Metadata `json:"metadata,omitempty"`
1926
}
2027

2128
// Actual catalog that we export
@@ -24,14 +31,50 @@ type Catalog struct {
2431
Streams []CatalogEntry `json:"streams"`
2532
}
2633

27-
func NewCatalog(streams map[string]Stream) *Catalog {
34+
func (c *Catalog) GetEnabledStreams() []CatalogEntry {
35+
var enabledStreams []CatalogEntry
36+
37+
// Go through all streams registered in the catalog
38+
for _, entry := range c.Streams {
39+
// if there is no metadata then just include the stream
40+
if entry.Metadata == nil {
41+
enabledStreams = append(enabledStreams, entry)
42+
} else {
43+
for _, metadata := range *entry.Metadata {
44+
// Only check the top level metadata
45+
if len(metadata.Breadcrumb) > 0 {
46+
continue
47+
}
48+
49+
// Check if the metadata has the user input "selected" bool
50+
if metadata.Metadata.Selected != nil {
51+
// If so, check its set to true!
52+
if *metadata.Metadata.Selected {
53+
enabledStreams = append(enabledStreams, entry)
54+
}
55+
// otherwise check if WE have set to select this by default
56+
} else if metadata.Metadata.SelectedByDefault {
57+
enabledStreams = append(enabledStreams, entry)
58+
}
59+
}
60+
}
61+
}
62+
63+
return enabledStreams
64+
}
65+
66+
func NewDefaultCatalog(streams map[string]Stream) *Catalog {
2867
entries := []CatalogEntry{}
2968

3069
for name, stream := range streams {
70+
streamSchema := *stream.Output().Schema
71+
metadata := Metadata{}.DefaultMetadata(streamSchema)
72+
3173
catalogEntry := CatalogEntry{
3274
Stream: name,
3375
TapStreamID: name,
34-
Schema: *stream.Output().Schema,
76+
Schema: streamSchema,
77+
Metadata: &metadata,
3578
}
3679

3780
entries = append(entries, catalogEntry)
@@ -41,3 +84,34 @@ func NewCatalog(streams map[string]Stream) *Catalog {
4184
Streams: entries,
4285
}
4386
}
87+
88+
type CatalogLoader interface {
89+
Load(context.Context) (*Catalog, error)
90+
}
91+
92+
type CatalogLoaderFunc func(context.Context) (*Catalog, error)
93+
94+
func (l CatalogLoaderFunc) Load(ctx context.Context) (*Catalog, error) {
95+
return l(ctx)
96+
}
97+
98+
// CatalogFileLoader loads Catalog from a filepath
99+
type CatalogFileLoader string
100+
101+
func (l CatalogFileLoader) Load(context.Context) (*Catalog, error) {
102+
data, err := os.ReadFile(string(l))
103+
if err != nil {
104+
return nil, err
105+
}
106+
107+
return ParseCatalogFile(string(l), data)
108+
}
109+
110+
func ParseCatalogFile(filename string, data []byte) (*Catalog, error) {
111+
var catalog Catalog
112+
if err := json.Unmarshal(data, &catalog); err != nil {
113+
return nil, errors.Wrap(err, "parsing json")
114+
}
115+
116+
return &catalog, nil
117+
}

tap/metadata.go

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package tap
2+
3+
import "github.com/incident-io/singer-tap/model"
4+
5+
type Metadata struct {
6+
// Pointer to where in the schmea this metadata applies
7+
Breadcrumb []string `json:"breadcrumb"`
8+
9+
// Fields set for this metadata object
10+
Metadata MetadataFields `json:"metadata"`
11+
}
12+
13+
type MetadataFields struct {
14+
/****
15+
* NON DISCOVERABLE FIELDS
16+
* We don't control these - pull them in and use them
17+
****/
18+
19+
// Selected: if this node is selected by the user to be emitted
20+
// Can be field level or whole stream
21+
Selected *bool `json:"selected,omitempty"`
22+
23+
// ReplicationMethod: the replication method to use
24+
// we ignored for our tap
25+
ReplicationMethod *string `json:"replicate-method,omitempty"`
26+
27+
// ReplicationKey: the replicate key for this node
28+
// Used as a bookmark - ignore for our tap
29+
ReplicationKey *string `json:"replication-key,omitempty"`
30+
31+
// ViewKeyProperties: not sure how this is used
32+
// ignored for our tap
33+
ViewKeyProperties *[]string `json:"view-key-properties,omitempty"`
34+
35+
/****
36+
* DISCOVERABLE FIELDS
37+
* We can read and write these fields
38+
****/
39+
40+
// Inclusion: whether we emit this field automatically
41+
// can be available (you choose), automatic (we choose), or unsupported (we don't emit)
42+
Inclusion string `json:"inclusion"`
43+
44+
// SelectedByDefault: If the user doesn't specify should we
45+
// emit this field by default
46+
// This really only applies to available inclusion setting
47+
SelectedByDefault bool `json:"selected-by-default"`
48+
49+
// ForcedReplicateMethod: we will set to FULL_TABLE for our tap
50+
ForcedReplicationMethod string `json:"forced-replication-method"`
51+
}
52+
53+
func (m Metadata) DefaultMetadata(schema model.Schema) []Metadata {
54+
// By default we always include a top level metadata with the same
55+
// settings
56+
var metadata = []Metadata{
57+
{
58+
Breadcrumb: []string{},
59+
Metadata: MetadataFields{
60+
Inclusion: "available", // always set to available at stream level
61+
SelectedByDefault: true, // lets assume people always want our data
62+
ForcedReplicationMethod: "FULL_TABLE", // HIGHWAY TO THE DATA ZONE
63+
},
64+
},
65+
}
66+
67+
return metadata
68+
}

tap/tap.go

+14-5
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,18 @@ import (
88
"github.com/incident-io/singer-tap/client"
99
)
1010

11-
func Sync(ctx context.Context, logger kitlog.Logger, ol *OutputLogger, cl *client.ClientWithResponses) error {
12-
for name, stream := range streams {
13-
logger := kitlog.With(logger, "stream", name)
11+
func Sync(ctx context.Context, logger kitlog.Logger, ol *OutputLogger, cl *client.ClientWithResponses, catalog *Catalog) error {
12+
// If we weren't given a catalog, create a default one and use that
13+
if catalog == nil {
14+
catalog = NewDefaultCatalog(streams)
15+
}
16+
17+
// We only want to sync enabled streams
18+
enabledStreams := catalog.GetEnabledStreams()
19+
20+
for _, catalogEntry := range enabledStreams {
21+
stream := streams[catalogEntry.Stream]
22+
logger := kitlog.With(logger, "stream", catalogEntry.Stream)
1423

1524
logger.Log("msg", "outputting schema")
1625
if err := ol.Log(stream.Output()); err != nil {
@@ -28,7 +37,7 @@ func Sync(ctx context.Context, logger kitlog.Logger, ol *OutputLogger, cl *clien
2837
for _, record := range records {
2938
op := &Output{
3039
Type: OutputTypeRecord,
31-
Stream: name,
40+
Stream: catalogEntry.Stream,
3241
Record: record,
3342
TimeExtracted: timeExtracted,
3443
}
@@ -42,7 +51,7 @@ func Sync(ctx context.Context, logger kitlog.Logger, ol *OutputLogger, cl *clien
4251
}
4352

4453
func Discover(ctx context.Context, logger kitlog.Logger, ol *OutputLogger) error {
45-
catalog := NewCatalog(streams)
54+
catalog := NewDefaultCatalog(streams)
4655

4756
if err := ol.CataLog(catalog); err != nil {
4857
return err

0 commit comments

Comments
 (0)