Skip to content

Commit

Permalink
Diagnostic client
Browse files Browse the repository at this point in the history
- the client allows to talk to the diagnostic server and
decode the internal values of the overlay and service discovery

- the tool also allows to remediate in case of orphans entries

Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
  • Loading branch information
Flavio Crisciani committed Dec 14, 2017
1 parent 0e28450 commit 84493e3
Show file tree
Hide file tree
Showing 5 changed files with 204 additions and 0 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ build-local:
@mkdir -p "bin"
go build -tags experimental -o "bin/dnet" ./cmd/dnet
go build -o "bin/docker-proxy" ./cmd/proxy
GOOS=linux go build -o "./cmd/diagnostic/diagnosticClient" ./cmd/diagnostic

clean:
@echo "🐳 $@"
Expand Down
8 changes: 8 additions & 0 deletions cmd/diagnostic/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM docker:17.12-rc-dind

RUN apk add --no-cache curl

WORKDIR /tool

COPY daemon.json /etc/docker/daemon.json
COPY diagnosticClient /tool/diagnosticClient
4 changes: 4 additions & 0 deletions cmd/diagnostic/daemon.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"debug": true,
"network-diagnostic-port": 2000
}
Binary file added cmd/diagnostic/diagnosticClient
Binary file not shown.
191 changes: 191 additions & 0 deletions cmd/diagnostic/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
package main

import (
"bufio"
"encoding/base64"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"strings"

"github.com/docker/libnetwork"
"github.com/docker/libnetwork/diagnose"
"github.com/docker/libnetwork/drivers/overlay"
"github.com/sirupsen/logrus"
)

const (
readyPath = "http://%s:%d/ready"
joinNetwork = "http://%s:%d/joinnetwork?nid=%s"
leaveNetwork = "http://%s:%d/leavenetwork?nid=%s"
clusterPeers = "http://%s:%d/clusterpeers?json"
networkPeers = "http://%s:%d/networkpeers?nid=%s&json"
dumpTable = "http://%s:%d/gettable?nid=%s&tname=%s&json"
deleteEntry = "http://%s:%d/deleteentry?nid=%s&tname=%s&key=%s&json"
)

func httpIsOk(body io.ReadCloser) {
b, err := ioutil.ReadAll(body)
if err != nil {
logrus.Fatalf("Failed the body parse %s", err)
}
if !strings.Contains(string(b), "OK") {
logrus.Fatalf("Server not ready %s", b)
}
body.Close()
}

func main() {
ipPtr := flag.String("ip", "127.0.0.1", "ip address")
portPtr := flag.Int("port", 2000, "port")
networkPtr := flag.String("net", "", "target network")
tablePtr := flag.String("t", "", "table to process <sd/overlay>")
remediatePtr := flag.Bool("r", false, "perform remediation deleting orphan entries")
verbosePtr := flag.Bool("v", false, "verbose output")

flag.Parse()

if *verbosePtr {
logrus.SetLevel(logrus.DebugLevel)
}

logrus.Infof("Connecting to %s:%d checking ready", *ipPtr, *portPtr)
resp, err := http.Get(fmt.Sprintf(readyPath, *ipPtr, *portPtr))
if err != nil {
logrus.WithError(err).Fatalf("The connection failed")
}
httpIsOk(resp.Body)

clusterPeers := fetchNodePeers(*ipPtr, *portPtr, "")
var networkPeers map[string]string
var joinedNetwork bool
if *networkPtr != "" {
logrus.Infof("Joining the network:%s", *networkPtr)
resp, err = http.Get(fmt.Sprintf(joinNetwork, *ipPtr, *portPtr, *networkPtr))
if err != nil {
logrus.WithError(err).Fatalf("Failed joining the network")
}
httpIsOk(resp.Body)
networkPeers = fetchNodePeers(*ipPtr, *portPtr, *networkPtr)
joinedNetwork = true
}

switch *tablePtr {
case "sd":
fetchTable(*ipPtr, *portPtr, *networkPtr, "endpoint_table", clusterPeers, networkPeers, *remediatePtr)
case "overlay":
fetchTable(*ipPtr, *portPtr, *networkPtr, "overlay_peer_table", clusterPeers, networkPeers, *remediatePtr)
}

if joinedNetwork {
resp, err = http.Get(fmt.Sprintf(leaveNetwork, *ipPtr, *portPtr, *networkPtr))
if err != nil {
logrus.WithError(err).Fatalf("Failed leaving the network")
}
httpIsOk(resp.Body)
}
}

func fetchNodePeers(ip string, port int, network string) map[string]string {
logrus.Infof("Fetch peers %s", network)
var path string
if network != "" {
path = fmt.Sprintf(networkPeers, ip, port, network)
} else {
path = fmt.Sprintf(clusterPeers, ip, port)
}

resp, err := http.Get(path)
if err != nil {
logrus.WithError(err).Fatalf("Failed fetching path")
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
logrus.WithError(err).Fatalf("Failed the body parse")
}

output := diagnose.HTTPResult{Details: &diagnose.TablePeersResult{}}
err = json.Unmarshal(body, &output)
if err != nil {
logrus.WithError(err).Fatalf("Failed the json unmarshalling")
}

logrus.Debugf("Parsing JSON response")
result := make(map[string]string, output.Details.(*diagnose.TablePeersResult).Length)
for _, v := range output.Details.(*diagnose.TablePeersResult).Elements {
logrus.Debugf("name:%s ip:%s", v.Name, v.IP)
result[v.Name] = v.IP
}
return result
}

func fetchTable(ip string, port int, network, tableName string, networkPeers, clusterPeers map[string]string, remediate bool) {
logrus.Infof("Fetch %s table and check owners", tableName)
resp, err := http.Get(fmt.Sprintf(dumpTable, ip, port, network, tableName))
if err != nil {
logrus.WithError(err).Fatalf("Failed fetching endpoint table")
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
logrus.WithError(err).Fatalf("Failed the body parse")
}

output := diagnose.HTTPResult{Details: &diagnose.TableEndpointsResult{}}
err = json.Unmarshal(body, &output)
if err != nil {
logrus.WithError(err).Fatalf("Failed the json unmarshalling")
}

logrus.Debug("Parsing data structures")
var orphanKeys []string
for _, v := range output.Details.(*diagnose.TableEndpointsResult).Elements {
decoded, err := base64.StdEncoding.DecodeString(v.Value)
if err != nil {
logrus.WithError(err).Errorf("Failed decoding entry")
continue
}
switch tableName {
case "endpoint_table":
var elem libnetwork.EndpointRecord
elem.Unmarshal(decoded)
logrus.Debugf("key:%s value:%+v owner:%s", v.Key, elem, v.Owner)
case "overlay_peer_table":
var elem overlay.PeerRecord
elem.Unmarshal(decoded)
logrus.Debugf("key:%s value:%+v owner:%s", v.Key, elem, v.Owner)
}

if _, ok := networkPeers[v.Owner]; !ok {
logrus.Warnf("The element with key:%s does not belong to any node on this network", v.Key)
orphanKeys = append(orphanKeys, v.Key)
}
if _, ok := clusterPeers[v.Owner]; !ok {
logrus.Warnf("The element with key:%s does not belong to any node on this cluster", v.Key)
}
}

if len(orphanKeys) > 0 && remediate {
logrus.Warnf("The following keys:%v results as orphan, do you want to proceed with the deletion (this operation is irreversible)? [Yes/No]", orphanKeys)
reader := bufio.NewReader(os.Stdin)
text, _ := reader.ReadString('\n')
text = strings.Replace(text, "\n", "", -1)
if strings.Compare(text, "Yes") == 0 {
for _, k := range orphanKeys {
resp, err := http.Get(fmt.Sprintf(deleteEntry, ip, port, network, tableName, k))
if err != nil {
logrus.WithError(err).Errorf("Failed deleting entry k:%s", k)
break
}
resp.Body.Close()
}
} else {
logrus.Infof("Deletion skipped")
}
}
}

0 comments on commit 84493e3

Please sign in to comment.