== sanity-lnet test 210: Local NI recovery checks ======== 09:44:45 (1773668685) Loading LNet and configuring DLC /home/green/git/lustre-release/lustre/tests/test-framework.sh: line 1040: echo: write error: Device or resource busy Loading modules from /home/green/git/lustre-release/lustre detected 4 online CPUs by sysfs MODOPTS_LIBCFS= Force libcfs to create 2 CPU partitions /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet configure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp --if ens2 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp1 --if ens2 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl discover 192.168.204.59@tcp discover: - primary nid: 192.168.204.59@tcp Multi-Rail: true peer_ni: - nid: 192.168.204.59@tcp - nid: 192.168.204.59@tcp1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 10 debug=+net /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s *@tcp -d *@tcp -m GET -r 1 -e local_error /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e local_error /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s *@tcp -d *@tcp -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s *@tcp1 -d *@tcp1 -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net set --health 0 --nid 192.168.204.59@tcp Check ping counts: Waiting 10s for '2' Updated after 3s: want '2' got '2' ping_count: 0 - nid: 192.168.204.59@tcp ping_count: 2 - nid: 192.168.204.59@tcp1 ping_count: 0 -l recovery queue should have 192.168.204.59@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -l Local NI recovery: nid-0: 192.168.204.59@tcp Check ping counts: Waiting 10s for '3' Updated after 4s: want '3' got '3' ping_count: 0 - nid: 192.168.204.59@tcp ping_count: 3 - nid: 192.168.204.59@tcp1 ping_count: 0 -l recovery queue should have 192.168.204.59@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -l Local NI recovery: nid-0: 192.168.204.59@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop del -a /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet unconfigure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet configure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp --if ens2 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp1 --if ens2 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl discover 192.168.204.59@tcp discover: - primary nid: 192.168.204.59@tcp Multi-Rail: true peer_ni: - nid: 192.168.204.59@tcp - nid: 192.168.204.59@tcp1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_recovery_ping_interval 4 debug=+net /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s *@tcp -d *@tcp -m GET -r 1 -e local_error /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e local_error /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s *@tcp -d *@tcp -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s *@tcp1 -d *@tcp1 -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net set --health 0 --nid 192.168.204.59@tcp Check ping counts: Waiting 10s for '2' Updated after 3s: want '2' got '2' ping_count: 0 - nid: 192.168.204.59@tcp ping_count: 2 - nid: 192.168.204.59@tcp1 ping_count: 0 -l recovery queue should have 192.168.204.59@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -l Local NI recovery: nid-0: 192.168.204.59@tcp Check ping counts: Waiting 10s for '4' Updated after 9s: want '4' got '4' ping_count: 0 - nid: 192.168.204.59@tcp ping_count: 4 - nid: 192.168.204.59@tcp1 ping_count: 0 -l recovery queue should have 192.168.204.59@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl debug recovery -l Local NI recovery: nid-0: 192.168.204.59@tcp /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop del -a /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_recovery_ping_interval 900 pdsh@oleg459-client: oleg459-server: ssh exited with exit code 2 pdsh@oleg459-client: oleg459-server: ssh exited with exit code 2