Skip to content

Commit

Permalink
Fix jepsen ssh (#131)
Browse files Browse the repository at this point in the history
* jepsen fix

* activate jepsen tests on push and commiting pr

* move sshd_config to jepsen_common

* avoid nil pointer dereference panic

* timeout

* add cron

* change time limit

* Remove jepson from push-pull workflow
  • Loading branch information
noname0443 authored Sep 19, 2024
1 parent df75a5e commit 3050bab
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 20 deletions.
4 changes: 2 additions & 2 deletions internal/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -2195,7 +2195,7 @@ func (app *App) getClusterStateFromDB() map[string]*NodeState {
getter := func(host string) (*NodeState, error) {
return app.getNodeState(host), nil
}
clusterState, _ := getNodeStatesInParallel(hosts, getter)
clusterState, _ := getNodeStatesInParallel(hosts, getter, app.logger)
return clusterState
}

Expand All @@ -2209,7 +2209,7 @@ func (app *App) getClusterStateFromDcs() (map[string]*NodeState, error) {
}
return nodeState, nil
}
return getNodeStatesInParallel(hosts, getter)
return getNodeStatesInParallel(hosts, getter, app.logger)
}

func (app *App) waitForCatchUp(node *mysql.Node, gtidset gtids.GTIDSet, timeout time.Duration, sleep time.Duration) (bool, error) {
Expand Down
9 changes: 7 additions & 2 deletions internal/app/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ func getDubiousHAHosts(clusterState map[string]*NodeState) []string {
return dubious
}

func getNodeStatesInParallel(hosts []string, getter func(string) (*NodeState, error)) (map[string]*NodeState, error) {
func getNodeStatesInParallel(hosts []string, getter func(string) (*NodeState, error), logger *log.Logger) (map[string]*NodeState, error) {
type result struct {
name string
state *NodeState
Expand Down Expand Up @@ -225,7 +225,12 @@ func getNodeStatesInParallel(hosts []string, getter func(string) (*NodeState, er
continue
}
masterHost := clusterState[host].SlaveState.MasterHost
clusterState[host].MasterState = clusterState[masterHost].MasterState

if clusterState[masterHost] != nil {
clusterState[host].MasterState = clusterState[masterHost].MasterState
} else {
logger.Error("Can not get master state")
}
}
return clusterState, nil
}
Expand Down
4 changes: 2 additions & 2 deletions tests/images/jepsen_common/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM mysync-test-base:latest
RUN (yes | ssh-keygen -t rsa -N '' -f /root/test_ssh_key -C jepsen || true) && \
RUN (yes | ssh-keygen -m PEM -t rsa -b 2048 -N '' -f /root/test_ssh_key -C root@mysync_jepsen_1 || true) && \
eval `ssh-agent -s` && cp /root/test_ssh_key.pub /root/.ssh/authorized_keys && \
cp /root/test_ssh_key.pub /root/.ssh/id_rsa.pub && \
cp /root/test_ssh_key /root/.ssh/id_rsa && ssh-add -k /root/.ssh/id_rsa

COPY ./ssh_config /etc/ssh/ssh_config
COPY ./sshd_config /etc/ssh/sshd_config
10 changes: 10 additions & 0 deletions tests/images/jepsen_common/sshd_config
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
PubkeyAuthentication yes
ChallengeResponseAuthentication no
UsePAM yes
X11Forwarding yes
PrintMotd no
AcceptEnv LANG LC_*
Subsystem sftp /usr/lib/openssh/sftp-server
PubkeyAcceptedAlgorithms ssh-rsa
PermitRootLogin yes
MaxAuthTries 1000000
8 changes: 4 additions & 4 deletions tests/images/jepsen_main/jepsen/src/jepsen/mysync.clj
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@
:name "mysync"
:os os/noop
:db (db)
:ssh {:private-key-path "/root/.ssh/id_rsa"}
:ssh {:private-key-path "/root/.ssh/id_rsa" :strict-host-key-checking :no}
:net net/iptables
:client (mysql-client nil)
:nemesis (nemesis/compose {{:start-halves :start} (nemesis/partition-random-halves)
Expand All @@ -266,16 +266,16 @@
(fn [] (map gen/once
[{:type :info, :f (rand-nth nemesis-starts)}
{:type :info, :f (rand-nth nemesis-starts)}
{:type :sleep, :value 60}
{:type :sleep, :value 120}
{:type :info, :f :stop}
{:type :sleep, :value 60}])))
{:type :sleep, :value 120}])))
(gen/time-limit 7200))
(->> r
(gen/stagger 1)
(gen/nemesis
(fn [] (map gen/once
[{:type :info, :f :stop}
{:type :sleep, :value 60}])))
{:type :sleep, :value 120}])))
(gen/time-limit 600)))
:checker mysync-set
:remote control/ssh})
20 changes: 10 additions & 10 deletions tests/images/jepsen_main/save_logs.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
#!/bin/bash

for i in 1 2 3
do
mkdir -p tests/logs/mysql${i}
mkdir -p tests/logs/zookeeper${i}
for i in 1 2 3; do
mkdir -p tests/logs/mysql${i}
mkdir -p tests/logs/zookeeper${i}

for logfile in /var/log/mysync.log /var/log/mysql/error.log /var/log/mysql/query.log /var/log/resetup.log /var/log/supervisor.log
do
logname=$(echo "${logfile}" | rev | cut -d/ -f1 | rev)
docker exec mysync_mysql${i}_1 cat "${logfile}" > "tests/logs/mysql${i}/${logname}"
done
queries=$(docker exec mysync_mysql${i}_1 bash -c 'ls /var/log/mysql/ -d /var/log/mysql/* | sed 1d')

docker exec mysync_zoo${i}_1 cat /var/log/zookeeper/zookeeper--server-mysync_zookeeper${i}_1.log > tests/logs/zookeeper${i}/zk.log 2>&1
for logfile in /var/log/mysync.log /var/log/mysql/error.log $queries /var/log/resetup.log /var/log/supervisor.log; do
logname=$(echo "${logfile}" | rev | cut -d/ -f1 | rev)
docker exec mysync_mysql${i}_1 cat "${logfile}" >"tests/logs/mysql${i}/${logname}"
done

docker exec mysync_zoo${i}_1 cat /var/log/zookeeper/zookeeper--server-mysync_zookeeper${i}_1.log >tests/logs/zookeeper${i}/zk.log 2>&1
done

tail -n 18 tests/logs/jepsen.log
Expand Down
10 changes: 10 additions & 0 deletions tests/images/jepsen_sshd_config
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
PubkeyAuthentication yes
ChallengeResponseAuthentication no
UsePAM yes
X11Forwarding yes
PrintMotd no
AcceptEnv LANG LC_*
Subsystem sftp /usr/lib/openssh/sftp-server
PubkeyAcceptedAlgorithms ssh-rsa
PermitRootLogin yes
MaxAuthTries 1000000

0 comments on commit 3050bab

Please sign in to comment.