== sanity-lnet test 207: Check health and resends for multi-rail remote errors ========================================================== 09:42:43 (1773668563) Cleaning up LNet LNET unconfigure error 22: Invalid argument unloading modules via unload_modules_local on: 'oleg459-server' oleg459-server: oleg459-server.virtnet: executing unload_modules_local oleg459-server: oleg459-server: LNET unconfigure error 22: Invalid argument modules unloaded. /home/green/git/lustre-release/lustre/tests/test-framework.sh: line 1040: echo: write error: Device or resource busy Loading modules from /home/green/git/lustre-release/lustre detected 4 online CPUs by sysfs MODOPTS_LIBCFS= Force libcfs to create 2 CPU partitions ../libcfs/libcfs/libcfs options: 'cpu_npartitions=2' /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet configure -a oleg459-server: Writer error: failed to resolve Netlink family id oleg459-server: opening /dev/lnet failed: No such file or directory oleg459-server: hint: the kernel modules may not be loaded oleg459-server: IOC_LIBCFS_GET_NI error 2: No such file or directory pdsh@oleg459-client: oleg459-server: ssh exited with exit code 1 oleg459-server: oleg459-server.virtnet: executing load_lnet oleg459-server: /home/green/git/lustre-release/lustre/tests/test-framework.sh: line 1040: echo: write error: Device or resource busy oleg459-server: Loading modules from /home/green/git/lustre-release/lustre oleg459-server: detected 4 online CPUs by sysfs oleg459-server: MODOPTS_LIBCFS= oleg459-server: Force libcfs to create 2 CPU partitions /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl discover 192.168.204.159@tcp discover: - primary nid: 192.168.204.159@tcp Multi-Rail: true peer_ni: - nid: 192.168.204.159@tcp oleg459-server: oleg459-server.virtnet: executing lnet_if_list oleg459-server: oleg459-server.virtnet: executing /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp1 --if ens2 oleg459-server: default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.159 oleg459-server: default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.159 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl lnet configure /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net add --net tcp1 --if ens2 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 default via 192.168.204.254 dev ens2 192.168.204.0/24 dev ens2 proto kernel scope link src 192.168.204.59 net: - net type: lo local NI(s): - nid: 0@lo status: up - net type: tcp local NI(s): - nid: 192.168.204.59@tcp status: up interfaces: 0: ens2 - net type: tcp1 local NI(s): - nid: 192.168.204.59@tcp1 status: up interfaces: 0: ens2 - primary nid: 192.168.204.159@tcp - nid: 192.168.204.159@tcp health stats: health value: 1000 - nid: 192.168.204.159@tcp1 health stats: health value: 1000 debug=+net Simulate remote_dropped /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp -d 192.168.204.159@tcp -m GET -e remote_dropped -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp -d 192.168.204.159@tcp1 -m GET -e remote_dropped -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp -d 192.168.204.59@tcp -m GET -e remote_dropped -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp -d 192.168.204.59@tcp1 -m GET -e remote_dropped -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s 192.168.204.59@tcp1 -d 192.168.204.159@tcp -m GET -e remote_dropped -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s 192.168.204.59@tcp1 -d 192.168.204.159@tcp1 -m GET -e remote_dropped -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s 192.168.204.59@tcp1 -d 192.168.204.59@tcp -m GET -e remote_dropped -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s 192.168.204.59@tcp1 -d 192.168.204.59@tcp1 -m GET -e remote_dropped -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl ping 192.168.204.159@tcp manage: - ping: errno: -5 descr: ! 'failed to ping 192.168.204.159@tcp: Input/output error' /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set numa_range 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_interfaces 200 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set discovery 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set drop_asym_route 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set retry_count 2 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set transaction_timeout 150 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set health_sensitivity 100 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_interval 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set router_sensitivity 100 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set response_tracking 3 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_recovery_ping_interval 900 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl peer set --health 1000 --all /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net set --health 1000 --all /home/green/git/lustre-release/lustre/utils/lctl net_drop del -a Check that 2 resends took place Check for 1 failed resend Check for 0 successful resends Check that local NI health is unchanged Check that remote NI health has been changed /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl peer set --health 1000 --all Simulate remote_error /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp -d 192.168.204.159@tcp -m GET -e remote_error -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp -d 192.168.204.159@tcp1 -m GET -e remote_error -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp -d 192.168.204.59@tcp -m GET -e remote_error -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp -d 192.168.204.59@tcp1 -m GET -e remote_error -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s 192.168.204.59@tcp1 -d 192.168.204.159@tcp -m GET -e remote_error -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s 192.168.204.59@tcp1 -d 192.168.204.159@tcp1 -m GET -e remote_error -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s 192.168.204.59@tcp1 -d 192.168.204.59@tcp -m GET -e remote_error -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp1 -d 192.168.204.59@tcp1 -m GET -e remote_error -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl ping 192.168.204.159@tcp manage: - ping: errno: -5 descr: ! 'failed to ping 192.168.204.159@tcp: Input/output error' /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set numa_range 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_interfaces 200 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set discovery 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set drop_asym_route 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set retry_count 2 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set transaction_timeout 150 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set health_sensitivity 100 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_interval 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set router_sensitivity 100 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set response_tracking 3 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_recovery_ping_interval 900 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl peer set --health 1000 --all /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net set --health 1000 --all /home/green/git/lustre-release/lustre/utils/lctl net_drop del -a Check that no resends took place Check that local NI health is unchanged Check that remote NI health has been changed /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl peer set --health 1000 --all Simulate remote_timeout /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s 192.168.204.59@tcp -d 192.168.204.159@tcp -m GET -e remote_timeout -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s 192.168.204.59@tcp -d 192.168.204.159@tcp1 -m GET -e remote_timeout -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp -d 192.168.204.59@tcp -m GET -e remote_timeout -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s 192.168.204.59@tcp -d 192.168.204.59@tcp1 -m GET -e remote_timeout -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp1 -d 192.168.204.159@tcp -m GET -e remote_timeout -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp1 -d 192.168.204.159@tcp1 -m GET -e remote_timeout -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl fault drop add -s 192.168.204.59@tcp1 -d 192.168.204.59@tcp -m GET -e remote_timeout -r 1 /home/green/git/lustre-release/lustre/utils/lctl net_drop add -s 192.168.204.59@tcp1 -d 192.168.204.59@tcp1 -m GET -e remote_timeout -r 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl ping 192.168.204.159@tcp manage: - ping: errno: -5 descr: ! 'failed to ping 192.168.204.159@tcp: Input/output error' /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set numa_range 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_interfaces 200 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set discovery 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set drop_asym_route 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set retry_count 2 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set transaction_timeout 150 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set health_sensitivity 100 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_interval 1 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set router_sensitivity 100 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set response_tracking 3 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set recovery_limit 0 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl set max_recovery_ping_interval 900 /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl peer set --health 1000 --all /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net set --health 1000 --all /home/green/git/lustre-release/lustre/utils/lctl net_drop del -a Check that no resends took place Check that local NI health is unchanged Check that remote NI health has been changed /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl peer set --health 1000 --all oleg459-server: oleg459-server.virtnet: executing /home/green/git/lustre-release/lustre/../lnet/utils/lnetctl net del --net tcp1 --if ens2 Writer error: failed to resolve Netlink family id unloading modules via unload_modules_local on: 'oleg459-server' oleg459-server: oleg459-server.virtnet: executing unload_modules_local oleg459-server: modules unloaded. oleg459-server: oleg459-server.virtnet: executing unload_modules_local oleg459-server: oleg459-server: LNET unconfigure error 22: Invalid argument pdsh@oleg459-client: oleg459-server: ssh exited with exit code 2 pdsh@oleg459-client: oleg459-client: ssh exited with exit code 2 pdsh@oleg459-client: oleg459-client: ssh exited with exit code 2 pdsh@oleg459-client: oleg459-server: ssh exited with exit code 2