-
Notifications
You must be signed in to change notification settings - Fork 215
/
Copy pathfileconfig.go
263 lines (227 loc) · 8.99 KB
/
fileconfig.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT
package logfile
import (
"errors"
"fmt"
"log"
"path/filepath"
"regexp"
"strings"
"time"
"golang.org/x/net/html/charset"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/ianaindex"
"github.com/aws/amazon-cloudwatch-agent/logs"
"github.com/aws/amazon-cloudwatch-agent/profiler"
)
const (
defaultMaxEventSize = 1024 * 256 //256KB
defaultTruncateSuffix = "[Truncated...]"
)
// The file config presents the structure of configuration for a file to be tailed.
type FileConfig struct {
//The file path for input log file.
FilePath string `toml:"file_path"`
//The blacklist used to filter out some files
Blacklist string `toml:"blacklist"`
PublishMultiLogs bool `toml:"publish_multi_logs"`
Encoding string `toml:"encoding"`
//The log group name for the input log file.
LogGroupName string `toml:"log_group_name"`
//log stream name
LogStreamName string `toml:"log_stream_name"`
//log group class
LogGroupClass string `toml:"log_group_class"`
//The regex of the timestampFromLogLine presents in the log entry
TimestampRegex string `toml:"timestamp_regex"`
//The timestampFromLogLine layout used in GoLang to parse the timestampFromLogLine.
TimestampLayout []string `toml:"timestamp_layout"`
//The time zone used to parse the timestampFromLogLine in the log entry.
Timezone string `toml:"timezone"`
//Trim timestamp from log line
TrimTimestamp bool `toml:"trim_timestamp"`
//Indicate whether it is a start of multiline.
//If this config is not present, it means the multiline mode is disabled.
//If this config is specified as "{timestamp_regex}", it means to use the same regex as timestampFromLogLine.
//If this config is specified as some regex, it will use the regex to determine if this line is a start line of multiline entry.
MultiLineStartPattern string `toml:"multi_line_start_pattern"`
// automatically remove the file / symlink after uploading.
// This auto removal does not support the case where other log rotation mechanism is already in place.
AutoRemoval bool `toml:"auto_removal"`
//Indicate whether to tail the log file from the beginning or not.
//The default value for this field should be set as true in configuration.
//Otherwise, it may skip some log entries for timestampFromLogLine suffix roatated new file.
FromBeginning bool `toml:"from_beginning"`
//Indicate whether it is a named pipe.
Pipe bool `toml:"pipe"`
//Indicate logType for scroll
LogType string `toml:"log_type"`
//Log Destination override
Destination string `toml:"destination"`
//Max size for a single log event to be in bytes
MaxEventSize int `toml:"max_event_size"`
//Suffix to be added to truncated logline to indicate its truncation
TruncateSuffix string `toml:"truncate_suffix"`
//Indicate retention in days for log group
RetentionInDays int `toml:"retention_in_days"`
Filters []*LogFilter `toml:"filters"`
//Customer specified service.name
ServiceName string `toml:"service_name"`
//Customer specified deployment.environment
Environment string `toml:"deployment_environment"`
//Time *time.Location Go type timezone info.
TimezoneLoc *time.Location
//Regexp go type timestampFromLogLine regex
TimestampRegexP *regexp.Regexp
//Regexp go type multiline start regex
MultiLineStartPatternP *regexp.Regexp
//Regexp go type blacklist regex
BlacklistRegexP *regexp.Regexp
//Decoder object
Enc encoding.Encoding
sampleCount int
}
// Initialize some variables in the FileConfig object based on the rest info fetched from the configuration file.
func (config *FileConfig) init() error {
var err error
if !(config.Encoding == "" || config.Encoding == "utf_8" || config.Encoding == "utf-8" || config.Encoding == "utf8" || config.Encoding == "ascii") {
if config.Enc, _ = charset.Lookup(config.Encoding); config.Enc == nil {
if config.Enc, _ = ianaindex.IANA.Encoding(config.Encoding); config.Enc == nil {
msg := fmt.Sprintf("E! the encoding %s is not supported.", config.Encoding)
log.Printf(msg)
return errors.New(msg)
}
}
}
//If the log group name is not specified, we will use the part before the last dot in the file path as the log group name.
if config.LogGroupName == "" && !config.PublishMultiLogs {
config.LogGroupName = logGroupName(config.FilePath)
}
//If the timezone info is not specified, we will use the Local timezone as default value.
if config.Timezone == time.UTC.String() {
config.TimezoneLoc = time.UTC
} else {
config.TimezoneLoc = time.Local
}
if config.TimestampRegex != "" {
if config.TimestampRegexP, err = regexp.Compile(config.TimestampRegex); err != nil {
return fmt.Errorf("timestamp_regex has issue, regexp: Compile( %v ): %v", config.TimestampRegex, err.Error())
}
}
if config.MultiLineStartPattern == "" {
config.MultiLineStartPattern = "^[\\S]"
}
if config.MultiLineStartPattern == "{timestamp_regex}" {
config.MultiLineStartPatternP = config.TimestampRegexP
} else {
if config.MultiLineStartPatternP, err = regexp.Compile(config.MultiLineStartPattern); err != nil {
return fmt.Errorf("multi_line_start_pattern has issue, regexp: Compile( %v ): %v", config.MultiLineStartPattern, err.Error())
}
}
if config.Blacklist != "" {
if config.BlacklistRegexP, err = regexp.Compile(config.Blacklist); err != nil {
return fmt.Errorf("blacklist regex has issue, regexp: Compile( %v ): %v", config.Blacklist, err.Error())
}
}
if config.MaxEventSize == 0 {
config.MaxEventSize = defaultMaxEventSize
}
if config.TruncateSuffix == "" {
config.TruncateSuffix = defaultTruncateSuffix
}
if config.RetentionInDays == 0 {
config.RetentionInDays = -1
}
for _, f := range config.Filters {
err = f.init()
if err != nil {
return err
}
}
return nil
}
// Try to parse the timestampFromLogLine value from the log entry line.
// The parser logic will be based on the timestampFromLogLine regex, and time zone info.
// If the parsing operation encounters any issue, int64(0) is returned.
func (config *FileConfig) timestampFromLogLine(logValue string) (time.Time, string) {
if config.TimestampRegexP == nil {
return time.Time{}, logValue
}
index := config.TimestampRegexP.FindStringSubmatchIndex(logValue)
if len(index) > 3 {
timestampContent := (logValue)[index[2]:index[3]]
if len(index) > 5 {
start := index[4] - index[2]
end := index[5] - index[2]
//append "000" to 2nd submatch in order to guarantee the fractional second at least has 3 digits
fracSecond := fmt.Sprintf("%s000", timestampContent[start:end])
replacement := fmt.Sprintf(".%s", fracSecond[:3])
timestampContent = fmt.Sprintf("%s%s%s", timestampContent[:start], replacement, timestampContent[end:])
}
var err error
var timestamp time.Time
for _, timestampLayout := range config.TimestampLayout {
timestamp, err = time.ParseInLocation(timestampLayout, timestampContent, config.TimezoneLoc)
if err == nil {
break
}
}
if err != nil {
log.Printf("E! Error parsing timestampFromLogLine: %s", err)
return time.Time{}, logValue
}
if timestamp.Year() == 0 {
now := time.Now()
timestamp = timestamp.AddDate(now.Year(), 0, 0)
// If now is very early January and we are pushing logs from very late
// December, there will be a very large number of hours different
// between the dates. 30 * 24 hours will be sufficient.
if timestamp.Sub(now) > 30*24*time.Hour {
timestamp = timestamp.AddDate(-1, 0, 0)
}
}
if config.TrimTimestamp {
// Trim the entire timestamp portion (from start to end of the match)
return timestamp, logValue[:index[0]] + logValue[index[1]:]
}
return timestamp, logValue
}
return time.Time{}, logValue
}
// This method determine whether the line is a start line for multiline log entry.
func (config *FileConfig) isMultilineStart(logValue string) bool {
if config.MultiLineStartPatternP == nil {
return false
}
return config.MultiLineStartPatternP.MatchString(logValue)
}
func ShouldPublish(logGroupName, logStreamName string, filters []*LogFilter, event logs.LogEvent) bool {
if len(filters) == 0 {
return true
}
ret := shouldPublishHelper(filters, event)
droppedCount := 0
if !ret {
droppedCount = 1
}
profiler.Profiler.AddStats([]string{"logfile", logGroupName, logStreamName, "messages", "dropped"}, float64(droppedCount))
return ret
}
func shouldPublishHelper(filters []*LogFilter, event logs.LogEvent) bool {
for _, filter := range filters {
if !filter.ShouldPublish(event) {
return false
}
}
return true
}
// The default log group name calculation logic if the log group name is not specified.
// It will use the part before the last dot in the file path, e.g.
// file path: "/tmp/TestLogFile.log.2017-07-11-14" -> log group name: "/tmp/TestLogFile.log"
// file path: "/tmp/TestLogFile.log" -> log group name: "/tmp/TestLogFile"
// Note: the above is default log group behavior, it is always recommended to specify the log group name for each input file pattern
func logGroupName(filePath string) string {
suffix := filepath.Ext(filePath)
return strings.TrimSuffix(filePath, suffix)
}