iperf 1.7.0 hanging on Linux / AMD64
Hello,
I'm sorry if this has just been discussed, but I subscribed just now to
report/ask about the following problem.
I have two Opteron nodes running an AMD64 build of RedHat Enterprise
Linux AS v 3.
When running iperf between these, there is no output after the initial
'Client connecting' message on the client, and the 'connected' message
on the server;
CLIENT
[root --at-- optimist-1 root]# ./iperf -c optimist-3
------------------------------------------------------------
Client connecting to optimist-3, TCP port 5001
TCP window size: 16.0 KByte (default)
------------------------------------------------------------
SERVER
[root --at-- optimist-3 root]# ./iperf -s
------------------------------------------------------------
Server listening on TCP port 5001
TCP window size: 85.3 KByte (default)
------------------------------------------------------------
[ 4] local 172.16.5.12 port 5001 connected with 172.16.5.76 port 32777
With strace -f, I believe I see that somehow the sendto/read goes awry;
CLIENT
[pid 2316] socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP) = 4
[pid 2316] getpid() = 2315
[pid 2316] bind(4, {sa_family=AF_INET, sin_port=htons(795),
sin_addr=inet_addr("0.0.0.0")}, 16) = 0
[pid 2316] ioctl(4, FIONBIO, [1]) = 0
[pid 2316] setsockopt(4, SOL_IP, IP_RECVERR, [4294967297], 4) = 0
[pid 2316] futex(0x2a95e275a0, FUTEX_WAKE, 2147483647) = 0
[pid 2316] fcntl(4, F_SETFD, FD_CLOEXEC) = 0
[pid 2316] close(3) = 0
[pid 2316] sendto(4,
"\32\255\373z\0\0\0\0\0\0\0\2\0\1\206\244\0\0\0\2\0\0\0"..., 88, 0,
{sa_family=AF_INET, sin_port=htons(686),
sin_addr=inet_addr("172.16.1.2")}, 16) = 88
[pid 2316] poll([{fd=4, events=POLLIN, revents=POLLIN}], 1, 5000) = 1
[pid 2316] recvfrom(4,
"\32\255\373z\0\0\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8800, 0,
{sa_family=AF_INET, sin_port=htons(686),
sin_addr=inet_addr("172.16.1.2")}, [16]) = 88
[pid 2316] close(4) = 0
[pid 2316] socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 3
[pid 2316] connect(3, {sa_family=AF_INET, sin_port=htons(5001),
sin_addr=inet_addr("172.16.5.12")}, 16) = 0
[pid 2316] fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0),
...}) = 0
[pid 2316] mmap(NULL, 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a9556c000
[pid 2316] write(1, "--------------------------------"...,
61------------------------------------------------------------
) = 61
[pid 2316] write(1, "Client connecting to optimist-3,"..., 47Client
connecting to optimist-3, TCP port 5001
) = 47
[pid 2316] getsockopt(3, SOL_SOCKET, SO_SNDBUF, "\0 --at-- \0\0",
[4656711363182198788]) = 0
[pid 2316] write(1, "TCP window size: 16.0 KByte (def"..., 38TCP window
size: 16.0 KByte (default)
) = 38
[pid 2316] write(1, "--------------------------------"...,
61------------------------------------------------------------
) = 61
[pid 2316] sendto(3,
"\0\0\0\0\0\0\0\1\0\0\23\211\0\0\0\0\0\0\0\0\377\377\374"..., 24, 0,
NULL, 0) = 24
[pid 2316] futex(0x511d38, FUTEX_WAIT, 0, NULL
SERVER (after client has connected, as above)
[pid 2300] mmap(NULL, 10485760, PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_PRIVATE|MAP_ANONYMOUS|0x40, -1, 0 <unfinished ...>
[pid 2301] --- SIGSTOP (Stopped (signal)) --at-- 0 (0) ---
[pid 2300] <... mmap resumed> ) = 0x41400000
[pid 2299] <... futex resumed> ) = -1 EINTR (Interrupted system call)
[pid 2301] --- SIGSTOP (Stopped (signal)) --at-- 0 (0) ---
[pid 2299] --- SIGSTOP (Stopped (signal)) --at-- 0 (0) ---
[pid 2299] --- SIGSTOP (Stopped (signal)) --at-- 0 (0) ---
[pid 2299] futex(0x511b18, FUTEX_WAIT, 0, NULL <unfinished ...>
[pid 2300] mprotect(0x41400000, 4096, PROT_NONE <unfinished ...>
[pid 2301] getpeername(4, <unfinished ...>
[pid 2300] <... mprotect resumed> ) = 0
[pid 2301] <... getpeername resumed> {sa_family=AF_INET,
sin_port=htons(32778), sin_addr=inet_addr("172.16.5.76")},
[756604745988177936]) = 0
[pid 2300] clone( <unfinished ...>
[pid 2301] getsockname(4, Process 2302 attached
<unfinished ...>
[pid 2300] <... clone resumed> child_stack=0x41dff8b0,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID|CLONE_DETACHED,
parent_tidptr=0x41dffa00, tls=0x41dff970, child_tidptr=0x41dffa00) = 2302
[pid 2301] <... getsockname resumed> {sa_family=AF_INET,
sin_port=htons(5001), sin_addr=inet_addr("172.16.5.12")},
[9877238416343564304]) = 0
[pid 2300] accept(3, <unfinished ...>
[pid 2302] --- SIGSTOP (Stopped (signal)) --at-- 0 (0) ---
[pid 2301] getpeername(4, <unfinished ...>
[pid 2299] <... futex resumed> ) = -1 EINTR (Interrupted system call)
[pid 2302] --- SIGSTOP (Stopped (signal)) --at-- 0 (0) ---
[pid 2299] --- SIGSTOP (Stopped (signal)) --at-- 0 (0) ---
[pid 2299] --- SIGSTOP (Stopped (signal)) --at-- 0 (0) ---
[pid 2299] --- SIGSTOP (Stopped (signal)) --at-- 0 (0) ---
[pid 2299] futex(0x511b18, FUTEX_WAIT, 0, NULL <unfinished ...>
[pid 2301] <... getpeername resumed> {sa_family=AF_INET,
sin_port=htons(32778), sin_addr=inet_addr("172.16.5.76")},
[756604745988177936]) = 0
[pid 2300] <... accept resumed> 0x409ff5e0, [4746787410178736256]) = -1
EINTR (Interrupted system call)
[pid 2302] futex(0x513f50, FUTEX_WAIT, 0, NULL <unfinished ...>
[pid 2301] write(1, "[ 4] local 172.16.5.12 port 500"..., 72
<unfinished ...>
[pid 2300] accept(3, [ 4] local 172.16.5.12 port 5001 connected with
172.16.5.76 port 32778
<unfinished ...>
[pid 2301] <... write resumed> ) = 72
[pid 2301] rt_sigaction(SIGPIPE, {SIG_IGN}, {SIG_IGN}, 8) = 0
[pid 2301] read(4,
Any thoughts/suggestions?
Yours,
-S
--
Simen Thoresen, Wulfkit Support, Dolphin ICS
http://valinor.dolphinics.no/~simentt/cluster