== sanity-lnet test 211: Remote NI recovery checks ======= 09:45:12 (1773668712) /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet unconfigure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet configure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp --if ens2 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp1 --if ens2 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl discover 192.168.204.59@tcp discover: - primary nid: 192.168.204.59@tcp Multi-Rail: true peer_ni: - nid: 192.168.204.59@tcp - nid: 192.168.204.59@tcp1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 10 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s *@tcp -d *@tcp -m GET -r 1 -e remote_error /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e remote_error /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s *@tcp -d *@tcp -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s *@tcp1 -d *@tcp1 -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl peer set --nid 192.168.204.59@tcp --health 0 -p recovery queue should have 192.168.204.59@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -p Peer NI recovery: nid-0: 192.168.204.59@tcp -p recovery queue should be empty Waiting 20s for '0' Updated after 16s: want '0' got '0' /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -p Check ping counts: - nid: 192.168.204.59@tcp ping_count: 0 - nid: 192.168.204.59@tcp1 ping_count: 0 /home/green/git/lustre-release/lustre/utils/lctl net_drop del -a /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl peer set --nid 192.168.204.59@tcp --health 500 -p recovery queue should have 192.168.204.59@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -p Peer NI recovery: nid-0: 192.168.204.59@tcp -p recovery queue should be empty Waiting 20s for '0' Updated after 6s: want '0' got '0' /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -p /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet unconfigure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet configure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp --if ens2 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp1 --if ens2 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl discover 192.168.204.59@tcp discover: - primary nid: 192.168.204.59@tcp Multi-Rail: true peer_ni: - nid: 192.168.204.59@tcp - nid: 192.168.204.59@tcp1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_recovery_ping_interval 4 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s *@tcp -d *@tcp -m GET -r 1 -e remote_error /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e remote_error /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s *@tcp -d *@tcp -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s *@tcp1 -d *@tcp1 -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl peer set --nid 192.168.204.59@tcp --health 0 Check ping counts: Waiting 4s for '1' - nid: 192.168.204.59@tcp ping_count: 1 - nid: 192.168.204.59@tcp1 ping_count: 0 -p recovery queue should have 192.168.204.59@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -p Peer NI recovery: nid-0: 192.168.204.59@tcp Check ping counts: Waiting 14s for '4' Updated after 10s: want '4' got '4' - nid: 192.168.204.59@tcp ping_count: 4 - nid: 192.168.204.59@tcp1 ping_count: 0 -p recovery queue should have 192.168.204.59@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -p Peer NI recovery: nid-0: 192.168.204.59@tcp /home/green/git/lustre-release/lustre/utils/lctl net_drop del -a /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_recovery_ping_interval 900 pdsh@oleg459-client: oleg459-server: ssh exited with exit code 2 pdsh@oleg459-client: oleg459-server: ssh exited with exit code 2