Skip to content

Commit

Permalink
Added metrics to self-healing logic (#841)
Browse files Browse the repository at this point in the history
  • Loading branch information
begmaroman authored Jun 22, 2022
1 parent 9ed581d commit fefd149
Show file tree
Hide file tree
Showing 14 changed files with 310 additions and 72 deletions.
12 changes: 8 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,20 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Install Go
uses: actions/setup-go@v1
uses: actions/setup-go@v3
with:
go-version: 1.13
go-version: 1.18.x

- name: "Build binaries"
run: make build

- name: "Run tests"
run: make tests

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
uses: codecov/codecov-action@v2
with:
file: ./cover.out
3 changes: 2 additions & 1 deletion .github/workflows/dockerimage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Build the Docker image
env:
DOCKERHUB: ${{ secrets.DOCKERHUB }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Set up Go
uses: actions/setup-go@master
with:
go-version: 1.17.x
go-version: 1.18.x

- name: Prepare
id: prepare
Expand Down
3 changes: 1 addition & 2 deletions bridge/cmd/purge.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ var purgeCmd = &cobra.Command{
}

func purgeQueue() {
var logger = helper.Logger.With("module", "bridge/cmd/")
dialer := helper.GetConfig().AmqpURL

// amqp dialer
Expand All @@ -33,7 +32,7 @@ func purgeQueue() {
panic(err)
}

if _, err := channel.QueuePurge(queue.QueueName, false); err != nil {
if _, err = channel.QueuePurge(queue.QueueName, false); err != nil {
logger.Error("purgeQueue | QueuePurge", "Error", err)
}
}
Expand Down
17 changes: 10 additions & 7 deletions bridge/cmd/reset.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,18 @@ import (
var resetCmd = &cobra.Command{
Use: "unsafe-reset-all",
Short: "Reset bridge server data",
Run: func(cmd *cobra.Command, args []string) {
RunE: func(cmd *cobra.Command, args []string) error {
dbLocation := viper.GetString(bridgeDBFlag)
if dir, err := ioutil.ReadDir(dbLocation); err != nil {
// fmt.Println(err)
} else {
for _, d := range dir {
os.RemoveAll(path.Join([]string{dbLocation, d.Name()}...))
}
dir, err := ioutil.ReadDir(dbLocation)
if err != nil {
return err
}

for _, d := range dir {
os.RemoveAll(path.Join([]string{dbLocation, d.Name()}...))
}

return nil
},
}

Expand Down
42 changes: 38 additions & 4 deletions bridge/cmd/root.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
package cmd

import (
"context"
"fmt"
"net/http"
"os"
"path/filepath"
"time"

"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/spf13/cobra"
"github.com/spf13/viper"

Expand All @@ -18,20 +22,35 @@ const (
logsTypeFlag = "logs-type"
)

var (
logger = helper.Logger.With("module", "bridge/cmd/")

metricsServer http.Server
)

// rootCmd represents the base command when called without any subcommands
var rootCmd = &cobra.Command{
Use: "heimdall-bridge",
Short: "Heimdall bridge deamon",
PersistentPreRun: func(cmd *cobra.Command, args []string) {
if cmd.Use != version.Cmd.Use {
// initialize tendermint viper config
InitTendermintViperConfig(cmd)
initTendermintViperConfig(cmd)

// init metrics server
initMetrics()
}
},
PostRunE: func(cmd *cobra.Command, args []string) error {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
defer cancel()

return metricsServer.Shutdown(ctx)
},
}

// InitTendermintViperConfig sets global viper configuration needed to heimdall
func InitTendermintViperConfig(cmd *cobra.Command) {
// initTendermintViperConfig sets global viper configuration needed to heimdall
func initTendermintViperConfig(cmd *cobra.Command) {
tendermintNode, _ := cmd.Flags().GetString(helper.NodeFlag)
homeValue, _ := cmd.Flags().GetString(helper.HomeFlag)
withHeimdallConfigValue, _ := cmd.Flags().GetString(helper.WithHeimdallConfigFlag)
Expand All @@ -56,6 +75,22 @@ func InitTendermintViperConfig(cmd *cobra.Command) {
helper.InitHeimdallConfig("")
}

// initMetrics initializes metrics server with the default handler
func initMetrics() {
metricsServer = http.Server{
Addr: ":2112",
}

http.Handle("/metrics", promhttp.Handler())

go func() {
if err := metricsServer.ListenAndServe(); err != nil {
logger.Error("failed to start metrics server", "error", err)
os.Exit(1)
}
}()
}

// Execute adds all child commands to the root command and sets flags appropriately.
// This is called by main.main(). It only needs to happen once to the rootCmd.
func Execute() {
Expand Down Expand Up @@ -93,7 +128,6 @@ func init() {
"Use json logger",
)

var logger = helper.Logger.With("module", "bridge/cmd/")
// bind all flags with viper
if err := viper.BindPFlags(rootCmd.Flags()); err != nil {
logger.Error("init | BindPFlag | rootCmd.Flags", "Error", err)
Expand Down
8 changes: 3 additions & 5 deletions bridge/cmd/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (

"github.com/cosmos/cosmos-sdk/client"
"github.com/spf13/cobra"

"github.com/spf13/viper"
"github.com/tendermint/tendermint/libs/common"
httpClient "github.com/tendermint/tendermint/rpc/client"
Expand All @@ -32,14 +31,13 @@ const (

// GetStartCmd returns the start command to start bridge
func GetStartCmd() *cobra.Command {
var logger = helper.Logger.With("module", "bridge/cmd/")
startCmd := &cobra.Command{
Use: "start",
Short: "Start bridge server",
Run: func(cmd *cobra.Command, args []string) {

// create codec
cdc := app.MakeCodec()

// queue connector & http client
_queueConnector := queue.NewQueueConnector(helper.GetConfig().AmqpURL)
_queueConnector.StartWorker()
Expand Down Expand Up @@ -106,12 +104,12 @@ func GetStartCmd() *cobra.Command {
time.Sleep(waitDuration)
}

// strt all processes
// Start all processes
for _, service := range services {
go func(serv common.Service) {
defer wg.Done()
// TODO handle error while starting service
if err := serv.Start(); err != nil {
if err = serv.Start(); err != nil {
logger.Error("GetStartCmd | serv.Start", "Error", err)
}
<-serv.Quit()
Expand Down
45 changes: 40 additions & 5 deletions bridge/setu/listener/rootchain_selfheal.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,50 @@ package listener

import (
"context"
"fmt"
"sync"
"time"

"github.com/maticnetwork/bor/core/types"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"

"github.com/maticnetwork/bor/core/types"
"github.com/maticnetwork/heimdall/bridge/setu/util"
"github.com/maticnetwork/heimdall/contracts/stakinginfo"
"github.com/maticnetwork/heimdall/contracts/statesender"
"github.com/maticnetwork/heimdall/helper"
)

var (
stateSyncedCounter = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "self_healing",
Subsystem: helper.NetworkName,
Name: "StateSynced",
Help: "The total number of missing StateSynced events",
}, []string{"id", "contract_address", "block_number", "tx_hash"})

stakeUpdateCounter = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "self_healing",
Subsystem: helper.NetworkName,
Name: "StakeUpdate",
Help: "The total number of missing StakeUpdate events",
}, []string{"id", "nonce", "contract_address", "block_number", "tx_hash"})
)

// startSelfHealing starts self-healing processes for all required events
func (rl *RootChainListener) startSelfHealing(ctx context.Context) {
stakeUpdateTicker := time.NewTicker(helper.GetConfig().SHStakeUpdateInterval)
stateSyncedTicker := time.NewTicker(helper.GetConfig().SHStateSyncedInterval)

rl.Logger.Info("Started self-healing")
for {
select {
case <-stakeUpdateTicker.C:
rl.processStakeUpdate(ctx)
case <-stateSyncedTicker.C:
rl.processStateSync(ctx)
rl.processStateSynced(ctx)
case <-ctx.Done():
rl.Logger.Info("Stopping stake update worker")
rl.Logger.Info("Stopping self-healing")
stakeUpdateTicker.Stop()
stateSyncedTicker.Stop()
return
Expand Down Expand Up @@ -76,6 +96,14 @@ func (rl *RootChainListener) processStakeUpdate(ctx context.Context) {
return
}

stakeUpdateCounter.WithLabelValues(
stakeUpdate.ValidatorId.String(),
stakeUpdate.Nonce.String(),
stakeUpdate.Raw.Address.String(),
fmt.Sprintf("%d", stakeUpdate.Raw.BlockNumber),
stakeUpdate.Raw.TxHash.String(),
).Add(1)

if _, err = rl.processEvent(ctx, stakeUpdate.Raw); err != nil {
rl.Logger.Error("Error processing stake update for validator", "error", err, "id", id)
}
Expand All @@ -84,8 +112,8 @@ func (rl *RootChainListener) processStakeUpdate(ctx context.Context) {
wg.Wait()
}

// processStateSync checks if chains are in sync, otherwise syncs them by broadcasting missing events
func (rl *RootChainListener) processStateSync(ctx context.Context) {
// processStateSynced checks if chains are in sync, otherwise syncs them by broadcasting missing events
func (rl *RootChainListener) processStateSynced(ctx context.Context) {
latestPolygonStateId, err := rl.getCurrentStateID(ctx)
if err != nil {
rl.Logger.Error("Unable to fetch latest state id from state receiver contract", "error", err)
Expand Down Expand Up @@ -119,6 +147,13 @@ func (rl *RootChainListener) processStateSync(ctx context.Context) {
continue
}

stateSyncedCounter.WithLabelValues(
stateSynced.Id.String(),
stateSynced.Raw.Address.String(),
fmt.Sprintf("%d", stateSynced.Raw.BlockNumber),
stateSynced.Raw.TxHash.String(),
).Add(1)

var ignore bool
if ignore, err = rl.processEvent(ctx, stateSynced.Raw); err != nil {
rl.Logger.Error("Unable to update state id on heimdall", "error", err)
Expand Down
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ require (
github.com/pborman/uuid v1.2.0
github.com/peterh/liner v1.2.0 // indirect
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.12.1
github.com/prometheus/tsdb v0.10.0 // indirect
github.com/prysmaticlabs/prysm v0.0.0-20190507024903-1be950f90cad
github.com/rakyll/statik v0.1.6
Expand All @@ -48,11 +49,10 @@ require (
github.com/tendermint/tendermint v0.32.7
github.com/tendermint/tm-db v0.2.0
github.com/tyler-smith/go-bip39 v1.0.2 // indirect
golang.org/x/crypto v0.0.0-20200429183012-4b2356b1ed79 // indirect
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c // indirect
gopkg.in/olebedev/go-duktape.v3 v3.0.0-20200619000410-60c24ae608a6 // indirect
gopkg.in/yaml.v2 v2.2.8
gopkg.in/yaml.v2 v2.4.0
)

replace github.com/tendermint/tendermint => github.com/maticnetwork/tendermint v0.26.0-dev0.0.20220311091200-3ea059b213ed
Expand Down
Loading

0 comments on commit fefd149

Please sign in to comment.