Skip to content

Commit cd1202c

Browse files
authored
ovn: fix corrupted database file on start (#3112)
1 parent 02f8c63 commit cd1202c

File tree

2 files changed

+44
-39
lines changed

2 files changed

+44
-39
lines changed

dist/images/Dockerfile.base

+3-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ RUN cd /usr/src/ && \
2323
# Add jitter parameter patch for netem qos
2424
curl -s https://github.com/kubeovn/ovs/commit/2eaaf89fbf3ee2172719ed10d045fd79900edc8e.patch | git apply && \
2525
# fix memory leak in qos
26-
curl -s https://github.com/kubeovn/ovs/commit/6a4dd2f4b9311a227cc26fef7c398ae9b241311b.patch | git apply
26+
curl -s https://github.com/kubeovn/ovs/commit/6a4dd2f4b9311a227cc26fef7c398ae9b241311b.patch | git apply && \
27+
# ovsdb-tool: add command fix-cluster
28+
curl -s https://github.com/kubeovn/ovs/commit/f52c239f5ded40b503e4d217f916b46ca413da4c.patch | git apply
2729

2830
RUN cd /usr/src/ && git clone -b branch-22.12 --depth=1 https://github.com/ovn-org/ovn.git && \
2931
cd ovn && \

dist/images/start-db.sh

+41-38
Original file line numberDiff line numberDiff line change
@@ -141,54 +141,57 @@ function ovn_db_pre_start() {
141141

142142
local db_file="/etc/ovn/ovn${1}_db.db"
143143
if [ -e "$db_file" ]; then
144-
if ovsdb-tool db-is-clustered "$db_file"; then
144+
if ovsdb-tool db-is-clustered "$db_file"; then
145145
local msg=$(ovsdb-tool check-cluster "$db_file" 2>&1) || true
146146
if echo $msg | grep -q 'has not joined the cluster'; then
147147
local birth_time=$(stat --format=%W $db_file)
148148
local now=$(date +%s)
149149
if [ $(($now - $birth_time)) -ge 120 ]; then
150-
echo "ovn db file $db_file exists for more than 120s, removing it..."
150+
echo "ovn db file $db_file exists for more than 120s, remove it."
151151
rm -f "$db_file" || return 1
152152
fi
153153
return
154154
fi
155155

156-
if ! ovsdb-tool check-cluster "$db_file"; then
157-
echo "detected database corruption for file $db_file, rebuild it."
158-
local sid=$(ovsdb-tool db-sid "$db_file")
159-
if ! echo -n "$sid" | grep -qE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'; then
160-
echo "failed to get sid from $1 db file $db_file"
161-
return 1
162-
fi
163-
echo "get local server id $sid"
164-
165-
eval port="\$${db_eval}_CLUSTER_PORT"
166-
local local_addr="$(gen_conn_addr $DB_CLUSTER_ADDR $port)"
167-
echo "local address: $local_addr"
168-
169-
local remote_addr=()
170-
local node_ips=$(echo -n "${NODE_IPS}" | sed 's/,/ /g')
171-
for node_ip in ${node_ips[*]}; do
172-
if [ ! "$node_ip" = "$DB_CLUSTER_ADDR" ]; then
173-
remote_addr=(${remote_addr[*]} "$(gen_conn_addr $node_ip $port)")
174-
fi
175-
done
176-
echo "remote addresses: ${remote_addr[*]}"
177-
178-
local db_new="$db_file.init-$(date +%s)-$(random_str)"
179-
echo "generating new database file $db_new"
180-
if [ ${#remote_addr[*]} -ne 0 ]; then
181-
ovsdb-tool --sid $sid join-cluster "$db_new" $db $local_addr ${remote_addr[*]} || return 1
182-
183-
local db_bak="$db_file.backup-$(date +%s)-$(random_str)"
184-
echo "backup $db_file to $db_bak"
185-
mv "$db_file" "$db_bak" || return 1
186-
187-
echo "use new database file $db_new"
188-
mv "$db_new" "$db_file"
189-
fi
190-
fi
191-
fi
156+
if ! ovsdb-tool check-cluster "$db_file"; then
157+
local db_bak="$db_file.backup-$(date +%s)-$(random_str)"
158+
echo "backup $db_file to $db_bak"
159+
cp "$db_file" "$db_bak" || return 1
160+
161+
echo "detected database corruption for file $db_file, try to fix it."
162+
if ! ovsdb-tool fix-cluster "$db_file"; then
163+
echo "failed to fix database file $db_file, rebuild it."
164+
local sid=$(ovsdb-tool db-sid "$db_file")
165+
if ! echo -n "$sid" | grep -qE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'; then
166+
echo "failed to get sid from db file $db_file"
167+
return 1
168+
fi
169+
echo "get local server id $sid"
170+
171+
eval port="\$${db_eval}_CLUSTER_PORT"
172+
local local_addr="$(gen_conn_addr $DB_CLUSTER_ADDR $port)"
173+
echo "local address: $local_addr"
174+
175+
local remote_addr=()
176+
local node_ips=$(echo -n "${NODE_IPS}" | sed 's/,/ /g')
177+
for node_ip in ${node_ips[*]}; do
178+
if [ ! "$node_ip" = "$DB_CLUSTER_ADDR" ]; then
179+
remote_addr=(${remote_addr[*]} "$(gen_conn_addr $node_ip $port)")
180+
fi
181+
done
182+
echo "remote addresses: ${remote_addr[*]}"
183+
184+
local db_new="$db_file.init-$(date +%s)-$(random_str)"
185+
echo "generating new database file $db_new"
186+
if [ ${#remote_addr[*]} -ne 0 ]; then
187+
ovsdb-tool --sid $sid join-cluster "$db_new" $db $local_addr ${remote_addr[*]} || return 1
188+
189+
echo "use new database file $db_new"
190+
mv "$db_new" "$db_file"
191+
fi
192+
fi
193+
fi
194+
fi
192195
fi
193196

194197
# create local config

0 commit comments

Comments
 (0)