Skip to content

Commit 00c1dba

Browse files
committed
fix multi-cluster contexts with talos
1 parent 94a17da commit 00c1dba

20 files changed

+248
-213
lines changed

.envrc

+2-7
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,8 @@
11
#shellcheck disable=SC2148,SC2155
2-
export ANSIBLE_CONFIG=$(expand_path ./ansible.cfg)
3-
export ANSIBLE_HOST_KEY_CHECKING="False"
2+
export KUBECONFIG="$(expand_path ./kubeconfig)"
43
export SOPS_AGE_KEY_FILE=$(expand_path ~/.config/sops/age/keys.txt)
4+
55
export LOCAL_ANSIBLE_PYTHON_INTERPRETER=$(which python)
6-
# Venv
76
PATH_add "$(expand_path ./.venv/bin)"
87
export VIRTUAL_ENV="$(expand_path ./.venv)"
98
export PYTHONDONTWRITEBYTECODE="1"
10-
# Talos
11-
export TALOSCONFIG="$(expand_path ./kubernetes/bootstrap/talos/clusterconfig/talosconfig)"
12-
# Bin
13-
PATH_add "$(expand_path ./.bin)"

.sops.yaml

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
---
22
creation_rules:
3-
- # IMPORTANT: Keep this rule first
4-
path_regex: talos/.+\.secret(\.sops)?\.ya?ml
5-
input_type: yaml
6-
encrypted_regex: ^(token|crt|key|id|secret|secretboxEncryptionSecret|ca)$
3+
- # IMPORTANT: This rule MUST be above the others
4+
path_regex: talos/.*\.sops\.ya?ml
75
key_groups:
86
- age:
9-
- age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs
7+
- "age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs"
108
- path_regex: kubernetes/.*\.sops\.ya?ml
119
encrypted_regex: "^(data|stringData)$"
1210
key_groups:

.taskfiles/kubernetes.yaml

+7-6
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@ version: "3"
44

55
vars:
66
KUBECONFORM_SCRIPT: "{{.SCRIPTS_DIR}}/kubeconform.sh"
7+
KUBECTL_CMD: "kubectl --context {{.cluster}}"
78

89
tasks:
910
resources:
1011
desc: Gather common resources in your cluster, useful when asking for support
1112
cmds:
1213
- for: { var: resource }
13-
cmd: kubectl --context {{.cluster}} get {{.ITEM}} {{.CLI_ARGS | default "-A"}}
14+
cmd: '{{.KUBECTL_CMD}} get {{.ITEM}} {{.CLI_ARGS | default "-A"}}'
1415
vars:
1516
resource: >-
1617
nodes
@@ -40,8 +41,8 @@ tasks:
4041
top:
4142
desc: List top metrics
4243
cmds:
43-
- kubectl --context {{.cluster}} top node
44-
- kubectl --context {{.cluster}} top pod -A
44+
- "{{.KUBECTL_CMD}} top node"
45+
- "{{.KUBECTL_CMD}} top pod -A"
4546
requires:
4647
vars: ["cluster"]
4748

@@ -53,7 +54,7 @@ tasks:
5354
ns: Namespace to browse PersistentVolumeClaims in (default: default)
5455
claim: PersistentVolumeClaim to browse (required)
5556
interactive: true
56-
cmd: kubectl browse-pvc --context {{.cluster}} --namespace {{.ns}} {{.claim}}
57+
cmd: "{{.KUBECTL_CMD}} browse-pvc --namespace {{.ns}} {{.claim}}"
5758
vars:
5859
ns: '{{.ns | default "default"}}'
5960
requires:
@@ -65,7 +66,7 @@ tasks:
6566
Args:
6667
cluster: Cluster to run command against (required)
6768
node: Node to drain (required)
68-
cmd: kubectl --context {{.cluster}} drain {{.node}} --ignore-daemonsets --delete-local-data --force
69+
cmd: "{{.KUBECTL_CMD}} drain {{.node}} --ignore-daemonsets --delete-local-data --force"
6970
requires:
7071
vars: ["cluster", "node"]
7172

@@ -76,6 +77,6 @@ tasks:
7677
cluster: Cluster to run command against (required)
7778
cmds:
7879
- for: ["Evicted", "Failed", "Succeeded"]
79-
cmd: kubectl --context {{.cluster}} delete pods --field-selector status.phase={{.ITEM}} -A --ignore-not-found=true
80+
cmd: "{{.KUBECTL_CMD}} delete pods --field-selector status.phase={{.ITEM}} -A --ignore-not-found=true"
8081
requires:
8182
vars: ["cluster"]

.taskfiles/talos.yaml

+53-65
Original file line numberDiff line numberDiff line change
@@ -2,126 +2,114 @@
22
# yaml-language-server: $schema=https://taskfile.dev/schema.json
33
version: "3"
44

5-
x-vars: &vars
6-
TALOS_VERSION:
7-
sh: yq 'select(document_index == 1).spec.postBuild.substitute.TALOS_VERSION' {{.KUBERNETES_DIR}}/{{.cluster}}/apps/system-upgrade/system-upgrade-controller/ks.yaml
8-
TALOS_SCHEMATIC_ID:
9-
sh: yq 'select(document_index == 1).spec.postBuild.substitute.TALOS_SCHEMATIC_ID' {{.KUBERNETES_DIR}}/{{.cluster}}/apps/system-upgrade/system-upgrade-controller/ks.yaml
10-
KUBERNETES_VERSION:
11-
sh: yq 'select(document_index == 1).spec.postBuild.substitute.KUBERNETES_VERSION' {{.KUBERNETES_DIR}}/{{.cluster}}/apps/system-upgrade/system-upgrade-controller/ks.yaml
12-
CONTROLLER:
13-
sh: talosctl --context {{.cluster}} config info --output json | jq --raw-output '.endpoints[]' | shuf -n 1
5+
vars:
6+
BOOTSTRAP_TALOS_DIR: "{{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos"
7+
TALHELPER_CLUSTER_DIR: "{{.BOOTSTRAP_TALOS_DIR}}/clusterconfig"
8+
TALHELPER_SECRET_FILE: "{{.BOOTSTRAP_TALOS_DIR}}/talsecret.sops.yaml"
9+
TALHELPER_CONFIG_FILE: "{{.BOOTSTRAP_TALOS_DIR}}/talconfig.yaml"
10+
HELMFILE_FILE: "{{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/helmfile.yaml"
11+
TALOSCONFIG_FILE: "{{.TALHELPER_CLUSTER_DIR}}/talosconfig"
12+
13+
env:
14+
TALOSCONFIG: "{{.TALOSCONFIG_FILE}}"
1415

1516
tasks:
1617
bootstrap:
1718
desc: Bootstrap the Talos cluster
18-
dir: "{{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos"
19+
dir: "/{{.BOOTSTRAP_TALOS_DIR}}"
1920
cmds:
2021
- |
21-
if [ ! -f "{{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/talsecret.sops.yaml" ]; then
22-
talhelper gensecret > {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/talsecret.sops.yaml
23-
sops --encrypt --in-place {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/talsecret.sops.yaml
22+
if [ ! -f "{{.TALHELPER_SECRET_FILE}}" ]; then
23+
talhelper gensecret > {{.TALHELPER_SECRET_FILE}}
24+
sops --encrypt --in-place {{.TALHELPER_SECRET_FILE}}
2425
fi
25-
- talhelper genconfig --config-file {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/talconfig.yaml --secret-file {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/talsecret.sops.yaml --out-dir {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/clusterconfig
26-
- talhelper gencommand apply --config-file {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/talconfig.yaml --out-dir {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/clusterconfig --extra-flags="--insecure" | bash
27-
- until talhelper gencommand bootstrap --config-file {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/talconfig.yaml --out-dir {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/clusterconfig | bash; do sleep 10; done
26+
- talhelper genconfig --config-file {{.TALHELPER_CONFIG_FILE}} --secret-file {{.TALHELPER_SECRET_FILE}} --out-dir {{.TALHELPER_CLUSTER_DIR}}
27+
- talhelper gencommand apply --config-file {{.TALHELPER_CONFIG_FILE}} --out-dir {{.TALHELPER_CLUSTER_DIR}} --extra-flags="--insecure" | bash
28+
- until talhelper gencommand bootstrap --config-file {{.TALHELPER_CONFIG_FILE}} --out-dir {{.TALHELPER_CLUSTER_DIR}} | bash; do sleep 10; done
2829
- task: fetch-kubeconfig
2930
- task: install-helm-apps
30-
- talosctl --context {{.cluster}} health --server=false
31+
- task: health
3132
requires:
3233
vars: ["cluster"]
3334
preconditions:
3435
- msg: Missing talhelper config file
35-
sh: test -f {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/talconfig.yaml
36+
sh: test -f {{.TALHELPER_CONFIG_FILE}}
3637
- msg: Missing Sops config file
3738
sh: test -f {{.SOPS_CONFIG_FILE}}
3839
- msg: Missing Sops Age key file
3940
sh: test -f {{.AGE_FILE}}
4041

41-
fetch-kubeconfig:
42-
desc: Fetch kubeconfig
43-
dir: "{{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos"
44-
cmd: |
45-
until talhelper gencommand kubeconfig --config-file {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/talconfig.yaml \
46-
--out-dir {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/clusterconfig \
47-
--extra-flags="{{.ROOT_DIR}} --force --force-context-name {{.cluster}}" \
48-
| bash; do sleep 10; done
42+
health:
43+
desc: Get Talos cluster health
44+
dir: "/{{.BOOTSTRAP_TALOS_DIR}}"
45+
cmd: "talosctl health --server=false"
4946
requires:
5047
vars: ["cluster"]
5148
preconditions:
52-
- msg: Missing talhelper config file
53-
sh: test -f {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/talconfig.yaml
49+
- msg: Missing talosconfig
50+
sh: test -f {{.TALOSCONFIG_FILE}}
5451

55-
apply-config:
56-
desc: Apply Talos configuration to a node
57-
cmd: |
58-
sops --decrypt {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/assets/{{.hostname}}.secret.sops.yaml | \
59-
envsubst | \
60-
talosctl --context {{.cluster}} apply-config --mode={{.mode}} --nodes {{.hostname}} --file /dev/stdin
61-
env: *vars
62-
vars:
63-
mode: '{{.mode | default "no-reboot"}}'
52+
fetch-kubeconfig:
53+
desc: Fetch kubeconfig
54+
dir: "/{{.BOOTSTRAP_TALOS_DIR}}"
55+
cmd: until talhelper gencommand kubeconfig --config-file {{.TALHELPER_CONFIG_FILE}} --out-dir {{.TALHELPER_CLUSTER_DIR}} --extra-flags="{{.ROOT_DIR}} --force" | bash; do sleep 10; done
6456
requires:
65-
vars: ["cluster", "hostname"]
57+
vars: ["cluster"]
6658
preconditions:
67-
- test -f {{.KUBERNETES_DIR}}/{{.cluster}}/talosconfig
68-
- test -f {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/assets/{{.hostname}}.secret.sops.yaml
69-
- talosctl --context {{.cluster}} --nodes {{.hostname}} get machineconfig >/dev/null 2>&1
70-
59+
- msg: Missing talhelper config file
60+
sh: test -f {{.TALHELPER_CONFIG_FILE}}
7161

7262
install-helm-apps:
7363
desc: Bootstrap core apps needed for Talos
74-
dir: "{{.KUBERNETES_DIR}}/bootstrap/talos"
64+
dir: "/{{.BOOTSTRAP_TALOS_DIR}}"
7565
cmds:
76-
- until kubectl --kube-context {{.cluster}} wait --for=condition=Ready=False nodes --all --timeout=600s; do sleep 10; done
77-
- helmfile --kube-context {{.cluster}} --file {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/helmfile.yaml apply --skip-diff-on-install --suppress-diff
78-
- until kubectl --kube-context {{.cluster}} wait --for=condition=Ready nodes --all --timeout=600s; do sleep 10; done
66+
- until kubectl --kubeconfig {{.KUBECONFIG_FILE}} wait --for=condition=Ready=False nodes --all --timeout=600s; do sleep 10; done
67+
- helmfile --kubeconfig {{.KUBECONFIG_FILE}} --file {{.HELMFILE_FILE}} apply --skip-diff-on-install --suppress-diff
68+
- until kubectl --kubeconfig {{.KUBECONFIG_FILE}} wait --for=condition=Ready nodes --all --timeout=600s; do sleep 10; done
7969
requires:
8070
vars: ["cluster"]
8171
preconditions:
82-
- msg: Missing talosconfig
83-
sh: test -f {{.KUBERNETES_DIR}}/{{.cluster}}/talosconfig
84-
- msg: Unable to retrieve Talos config
85-
sh: talosctl --context {{.cluster}} config info >/dev/null 2>&1
72+
- msg: Missing kubeconfig
73+
sh: test -f {{.KUBECONFIG_FILE}}
8674
- msg: Missing helmfile
87-
sh: test -f {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/helmfile.yaml
75+
sh: test -f {{.HELMFILE_FILE}}
8876

8977
upgrade:
9078
desc: Upgrade Talos on a node
91-
dir: "{{.KUBERNETES_DIR}}/bootstrap/talos"
79+
dir: "/{{.BOOTSTRAP_TALOS_DIR}}"
9280
cmds:
93-
- talosctl --context {{.cluster}} --nodes {{.node}} upgrade --image {{.image}} --wait=true --timeout=10m --preserve=true --reboot-mode={{.mode}}
94-
- talosctl --context {{.cluster}} --nodes {{.node}} health --wait-timeout=10m --server=false
81+
- "talosctl --nodes {{.node}} upgrade --image {{.image}} --wait=true --timeout=10m --preserve=true --reboot-mode={{.mode}}"
82+
- "talosctl --nodes {{.node}} health --wait-timeout=10m --server=false"
9583
vars:
9684
mode: '{{.mode | default "default"}}'
9785
requires:
9886
vars: ["cluster", "node", "image"]
9987
preconditions:
10088
- msg: Missing talosconfig
101-
sh: test -f {{.KUBERNETES_DIR}}/{{.cluster}}/talosconfig
89+
sh: test -f {{.TALOSCONFIG_FILE}}
10290
- msg: Unable to retrieve Talos config
103-
sh: talosctl --context {{.cluster}} config info >/dev/null 2>&1
91+
sh: "talosctl config info >/dev/null 2>&1"
10492
- msg: Node not found
105-
sh: talosctl --context {{.cluster}} --nodes {{.node}} get machineconfig >/dev/null 2>&1
93+
sh: "talosctl --nodes {{.node}} get machineconfig >/dev/null 2>&1"
10694

10795
upgrade-k8s:
10896
desc: Upgrade Kubernetes across the cluster
109-
dir: "{{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos"
110-
cmd: talosctl --context {{.cluster}} --nodes {{.controller}} upgrade-k8s --to {{.to}}
97+
dir: "/{{.BOOTSTRAP_TALOS_DIR}}"
98+
cmd: "talosctl --nodes {{.controller}} upgrade-k8s --to {{.to}}"
11199
requires:
112100
vars: ["cluster", "controller", "to"]
113101
preconditions:
114102
- msg: Missing talosconfig
115-
sh: test -f {{.KUBERNETES_DIR}}/{{.cluster}}/talosconfig
103+
sh: test -f {{.TALOSCONFIG_FILE}}
116104
- msg: Unable to retrieve Talos config
117-
sh: talosctl --context {{.cluster}} config info >/dev/null 2>&1
105+
sh: "talosctl config info >/dev/null 2>&1"
118106
- msg: Node not found
119-
sh: talosctl --context {{.cluster}} --nodes {{.controller}} get machineconfig >/dev/null 2>&1
107+
sh: "talosctl --nodes {{.controller}} get machineconfig >/dev/null 2>&1"
120108

121-
nuke:
109+
destroy:
122110
desc: Resets nodes back to maintenance mode
123-
dir: "{{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos"
111+
dir: "/{{.BOOTSTRAP_TALOS_DIR}}"
124112
prompt: This will destroy your cluster and reset the nodes back to maintenance mode... continue?
125-
cmd: talhelper gencommand reset --config-file {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/talconfig.yaml --out-dir {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/clusterconfig --extra-flags="--reboot {{- if eq .CLI_FORCE false }} --system-labels-to-wipe STATE --system-labels-to-wipe EPHEMERAL{{ end }} --graceful=false --wait=false" | bash
113+
cmd: talhelper gencommand reset --config-file {{.TALHELPER_CONFIG_FILE}} --out-dir {{.TALHELPER_CLUSTER_DIR}} --extra-flags="--reboot {{- if eq .CLI_FORCE false }} --system-labels-to-wipe STATE --system-labels-to-wipe EPHEMERAL{{ end }} --graceful=false --wait=false" | bash
126114
requires:
127115
vars: ["cluster"]

README.md

+21-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ task talos:bootstrap
4242
Install flux.
4343

4444
```sh
45-
task flux:{verify,github-deploy-key,bootstrap}
45+
task flux:{verify,bootstrap}
4646
```
4747

4848
Verify the installation.
@@ -55,6 +55,26 @@ kubectl -n flux-system get pods -o wide
5555
task kubernetes:resources
5656
```
5757

58+
#### Github Webhook
59+
60+
Setup a webook to reconcile flux when changes are pushed to Github. Note: this only works with Let's Encrypt Production certificates.
61+
62+
Get webook path:
63+
64+
```sh
65+
kubectl -n flux-system get receiver github-receiver -o jsonpath='{.status.webhookPath}'
66+
```
67+
68+
Append to self-hosted domain:
69+
70+
```text
71+
https://flux-webhook.${DOMAIN}/hook/12ebd1e363c641dc3c2e430ecf3cee2b3c7a5ac9e1234506f6f5f3ce1230e123
72+
```
73+
74+
Generate a webook token `openssl rand -hex 16` and add to secret: `kubernetes/<cluster>/apps/flux-system/webhooks/app/github/secret.sops.yaml`.
75+
76+
Add the webook to the repository's "Settings/Webhooks" > "Add webhook" button. Add the URL and token.
77+
5878
### Deployments
5979

6080
Most helm deployments in this repo utilize this useful [`app-template` chart](https://github.com/bjw-s/helm-charts).
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
homelab-k-0.yaml
2+
homelab-k-1.yaml
3+
homelab-k-2.yaml
4+
homelab-k-3.yaml
5+
homelab-k-4.yaml
6+
homelab-k-5.yaml
7+
homelab-k-6.yaml
8+
talosconfig
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
machine:
2+
features:
3+
kubernetesTalosAPIAccess:
4+
enabled: true
5+
allowedRoles:
6+
- os:admin
7+
allowedKubernetesNamespaces:
8+
- system-upgrade
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
cluster:
2+
allowSchedulingOnControlPlanes: true
3+
controllerManager:
4+
extraArgs:
5+
bind-address: 0.0.0.0
6+
coreDNS:
7+
disabled: true
8+
proxy:
9+
disabled: true
10+
scheduler:
11+
extraArgs:
12+
bind-address: 0.0.0.0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
- op: remove
2+
path: /cluster/apiServer/admissionControl
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
cluster:
2+
etcd:
3+
extraArgs:
4+
listen-metrics-urls: http://0.0.0.0:2381
5+
advertisedSubnets:
6+
- 10.1.2.0/24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
cluster:
2+
discovery:
3+
registries:
4+
kubernetes:
5+
disabled: false
6+
service:
7+
disabled: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
machine:
2+
files:
3+
- op: create
4+
path: /etc/cri/conf.d/20-customization.part
5+
content: |-
6+
[plugins."io.containerd.grpc.v1.cri"]
7+
enable_unprivileged_ports = true
8+
enable_unprivileged_icmp = true
9+
[plugins."io.containerd.grpc.v1.cri".containerd]
10+
discard_unpacked_layers = false
11+
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
12+
discard_unpacked_layers = false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
machine:
2+
network:
3+
disableSearchDomain: true
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
machine:
2+
features:
3+
hostDNS:
4+
enabled: true
5+
resolveMemberNames: true
6+
forwardKubeDNSToHost: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
machine:
2+
kubelet:
3+
extraArgs:
4+
rotate-server-certificates: true
5+
nodeIP:
6+
validSubnets:
7+
- 10.1.2.0/24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
machine:
2+
kubelet:
3+
extraMounts:
4+
- destination: /var/openebs/local
5+
type: bind
6+
source: /var/openebs/local
7+
options:
8+
- bind
9+
- rshared
10+
- rw

0 commit comments

Comments
 (0)