@@ -173,7 +173,7 @@ class HealthCheck {
173
173
this . services [ name ] = nService ;
174
174
175
175
this . services [ name ] . _sTimeoutHandler = setTimeout ( ( ) => {
176
- this . _runCheck ( this . services [ name ] ) ;
176
+ this . _run ( this . services [ name ] ) ;
177
177
} , ( nService . config . start_delay || 0 ) * 1000 ) ;
178
178
}
179
179
}
@@ -197,106 +197,127 @@ class HealthCheck {
197
197
return message || '' ;
198
198
}
199
199
200
- async _runCheck ( service ) {
201
- if ( service && service . enabled ) {
202
- const startTime = process . hrtime . bigint ( ) ;
203
- // const oldStatus = service.status.up;
200
+ async _runChecker ( service , startTime ) {
201
+ try {
202
+ var res = await service . checker . check ( ) ;
204
203
205
- try {
206
- var res = await service . checker . check ( ) ;
204
+ service . status . time =
205
+ Number ( process . hrtime . bigint ( ) - startTime ) / 1000000 ;
206
+ service . status . code = res . code ;
207
+ service . status . message = this . _mapMessages (
208
+ res . code ,
209
+ res . message ,
210
+ service
211
+ ) ;
212
+ service . status . up = 1 ;
213
+
214
+ if ( service . config . expected_status != service . status . code ) {
215
+ service . status . up = 0 ;
216
+ service . status . count . unhealthy_status ++ ;
217
+ log . info ( service . name , ' Unhealthy status: ' + service . status . code ) ;
218
+ }
207
219
220
+ if ( service . status . time > service . config . expected_response_time ) {
221
+ service . status . up = 0 ;
222
+ service . status . count . unhealthy_response_time ++ ;
223
+ log . info (
224
+ service . name ,
225
+ ' Unhealthy response time: ' + service . status . time . toFixed ( 2 ) + 'ms'
226
+ ) ;
227
+ }
228
+
229
+ if ( service . status . up > 0 ) {
230
+ service . status . count . healthy ++ ;
231
+ } else {
232
+ service . status . count . unhealthy ++ ;
233
+ }
234
+ } catch ( e ) {
235
+ if ( e . message . indexOf ( 'ETIMEDOUT' ) > - 1 ) {
208
236
service . status . time =
209
237
Number ( process . hrtime . bigint ( ) - startTime ) / 1000000 ;
210
- service . status . code = res . code ;
238
+
239
+ service . status . count . unhealthy ++ ;
240
+ service . status . up = 0 ;
241
+ service . status . code = 0 ;
242
+
211
243
service . status . message = this . _mapMessages (
212
- res . code ,
213
- res . message ,
244
+ service . status . code ,
245
+ 'Timedout' ,
214
246
service
215
247
) ;
216
- service . status . up = 1 ;
217
248
218
- if ( service . config . expected_status != service . status . code ) {
219
- service . status . up = 0 ;
220
- service . status . count . unhealthy_status ++ ;
221
- log . info ( service . name , ' Unhealthy status: ' + service . status . code ) ;
222
- }
249
+ log . info ( service . name , ' Unhealthy ETIMEDOUT!' ) ;
250
+ } else {
251
+ service . status . time =
252
+ Number ( process . hrtime . bigint ( ) - startTime ) / 1000000 ;
253
+ service . status . count . down ++ ;
254
+ service . status . up = - 1 ;
255
+ service . status . code = - 1 ;
256
+ service . status . message = this . _mapMessages (
257
+ service . status . code ,
258
+ e . message ,
259
+ service
260
+ ) ;
261
+ log . info ( service . name , ' Down! ' , e . message ) ;
262
+ }
223
263
224
- if ( service . status . time > service . config . expected_response_time ) {
225
- service . status . up = 0 ;
226
- service . status . count . unhealthy_response_time ++ ;
227
- log . info (
228
- service . name ,
229
- ' Unhealthy response time: ' + service . status . time . toFixed ( 2 ) + 'ms'
230
- ) ;
231
- }
264
+ log . debug ( service . name , e . message ) ;
265
+ }
232
266
233
- if ( service . status . up > 0 ) {
234
- service . status . count . healthy ++ ;
235
- } else {
236
- service . status . count . unhealthy ++ ;
237
- }
238
- } catch ( e ) {
239
- if ( e . message . indexOf ( 'ETIMEDOUT' ) > - 1 ) {
240
- service . status . up = 0 ;
241
- service . status . count . unhealthy ++ ;
242
- log . info ( service . name , ' Unhealthy ETIMEDOUT!' ) ;
243
- } else {
244
- service . status . time =
245
- Number ( process . hrtime . bigint ( ) - startTime ) / 1000000 ;
246
- service . status . count . down ++ ;
247
- service . status . up = - 1 ;
248
- service . status . code = 0 ;
249
- }
267
+ if ( service . status . last_status == null ) {
268
+ service . status . last_status = service . status . up ;
269
+ }
250
270
251
- log . debug ( service . name , e . message ) ;
271
+ if ( service . status . up > 0 ) {
272
+ if ( ! service . status . last_healthy ) {
273
+ service . status . last_healthy = process . hrtime . bigint ( ) ;
252
274
}
253
-
254
- if ( service . status . last_status == null ) {
255
- service . status . last_status = service . status . up ;
275
+ if ( service . status . last_status < 1 && service . status . last_healthy ) {
276
+ service . status . last_unhealthy_total_duration = (
277
+ Number ( process . hrtime . bigint ( ) - service . status . last_unhealthy ) /
278
+ 1000000000
279
+ ) . toFixed ( 3 ) ;
280
+ log . info (
281
+ service . name ,
282
+ `healthy again after ${ service . status . last_unhealthy_total_duration } second of down time!`
283
+ ) ;
284
+ service . status . last_healthy = process . hrtime . bigint ( ) ;
256
285
}
286
+ } else if ( ! service . status . last_unhealthy || ( service . status . last_status > 0 && service . status . last_unhealthy ) ) {
287
+ service . status . last_unhealthy = process . hrtime . bigint ( ) ;
288
+ }
289
+ }
290
+ async _run ( service ) {
291
+ if ( service && service . enabled ) {
292
+ const startTime = process . hrtime . bigint ( ) ;
257
293
258
- if ( service . status . up > 0 ) {
259
- if ( ! service . status . last_healthy ) {
260
- service . status . last_healthy = process . hrtime . bigint ( ) ;
261
- }
262
- if ( service . status . last_status < 1 && service . status . last_healthy ) {
263
- service . status . last_unhealthy_total_duration = (
264
- Number ( process . hrtime . bigint ( ) - service . status . last_unhealthy ) /
265
- 1000000000
266
- ) . toFixed ( 3 ) ;
267
- log . info (
268
- service . name ,
269
- `healthy again after ${ service . status . last_unhealthy_total_duration } second of down time!`
270
- ) ;
271
- service . status . last_healthy = process . hrtime . bigint ( ) ;
272
- }
273
- } else {
274
- if ( ! service . status . last_unhealthy ) {
275
- service . status . last_unhealthy = process . hrtime . bigint ( ) ;
276
- }
277
- if ( service . status . last_status > 0 && service . status . last_unhealthy ) {
278
- service . status . last_unhealthy = process . hrtime . bigint ( ) ;
279
- }
294
+ try {
295
+ await this . _runChecker ( service , startTime ) ;
296
+ this . stats . updateService ( service . name , service . status ) ;
297
+ await this . alerts . alert ( service ) ;
298
+ } catch ( e ) {
299
+ log . error ( e . message ) ;
280
300
}
281
301
282
- this . stats . updateService ( service . name , service . status ) ;
302
+ try {
303
+ service . status . last_status = service . status . up ;
304
+ const tout =
305
+ service . config . interval -
306
+ Number ( process . hrtime . bigint ( ) - startTime ) / 1000000 ;
283
307
284
- await this . alerts . alert ( service ) ;
285
- service . status . last_status = service . status . up ;
286
- const tout =
287
- service . config . interval -
288
- Number ( process . hrtime . bigint ( ) - startTime ) / 1000000 ;
308
+ if ( tout <= 0 ) {
309
+ log . debug ( service . name + ' tout: ' + ( tout > 0 ? tout : 0 ) ) ;
310
+ }
289
311
290
- if ( tout <= 0 ) {
291
- log . debug ( service . name + ' tout: ' + ( tout > 0 ? tout : 0 ) ) ;
312
+ this . services [ service . name ] . _sTimeoutHandler = setTimeout (
313
+ async ( ) => {
314
+ this . _run ( service ) ;
315
+ } ,
316
+ tout > 0 ? tout : 0
317
+ ) ;
318
+ } catch ( e ) {
319
+ log . fatal ( 'Could not run service: ' + ( service ? service . name : 'Unknown' + ' e:' + e . message ) ) ;
292
320
}
293
-
294
- this . services [ service . name ] . _sTimeoutHandler = setTimeout (
295
- async ( ) => {
296
- this . _runCheck ( service ) ;
297
- } ,
298
- tout > 0 ? tout : 0
299
- ) ;
300
321
}
301
322
}
302
323
}
0 commit comments