Kyle Hailey
2011-05-17 19:07:45 UTC
Is there a way to identify a delayed ACK in the TCP layer dtrace probes?
If I do
time dd if=/dev/zero of=foo bs=8k count=1
where the output file is on an NFS mount and dtrace the TCP send, receives
and look at the delta of the times, the last acknowledgement happens after
the dd has finished.
I'm imagining the the last ACK is a delayed acknowledgement.
I want to be able to measure the round trip speeds, but these slow ACKs
throw off the measurements . I want to be able to filter them out if
possible.
Here is an example
time dd if=/dev/zero of=foo bs=8k count=1
1+0 records in
1+0 records out
8192 bytes (8.2 kB) copied, 0.00193471 s, 4.2 MB/s
real 0m0.014s
user 0m0.001s
sys 0m0.006s
sudo nfslat.d 1 192.86.100.186
starting up ...
sbytes rbytes delta send recd flags
unackd unackd us bytes bytes
240 0 439 / 68 ACK|PUSH|
244 0 175 244 \ ACK|PUSH|
244 0 308 / 312 ACK|PUSH|
252 0 467 252 \ ACK|PUSH|
252 0 320 / 76 ACK|PUSH|
244 0 1717 244 \ ACK|PUSH|
244 0 268 / 312 ACK|PUSH|
168 0 208 168 \ ACK|PUSH|
168 0 250 / 388 ACK|PUSH|
244 0 199 244 \ ACK|PUSH|
244 0 260 / 312 ACK|PUSH|
176 0 149 176 \ ACK|PUSH|
176 0 250 / 248 ACK|PUSH|
280 0 159 280 \ ACK|PUSH|
280 0 606 / 532 ACK|PUSH|
176 0 173 176 \ ACK|PUSH|
176 0 245 / 84 ACK|PUSH|
0 84 380 1448 \ ACK|
0 84 58 1448 \ ACK|
0 84 36 1448 \ ACK|
0 84 34 1448 \ ACK|
0 84 36 1448 \ ACK|
0 84 34 1140 \ ACK|PUSH|
8380 0 360 / 84 ACK|PUSH|
168 0 144 168 \ ACK|PUSH|
168 0 471 / 76 ACK|PUSH|
192 0 116 192 \ ACK|PUSH|
192 0 275 / 256 ACK|PUSH|
244 0 5125 244 \ ACK|PUSH|
244 0 382 / 68 ACK|PUSH|
0 0 50698 0 \ ACK| 0
So the total dd time was 14ms where as the last ACK alone was 50ms.
Code
#!/usr/sbin/dtrace -s
#pragma D option quiet
#pragma D option defaultargs
inline int TICKS=$1;
inline string ADDR=$$2;
dtrace:::BEGIN
{
TIMER = ( TICKS != NULL ) ? TICKS : 1 ;
ticks = TIMER;
TITLE = 10;
title = 0;
walltime=timestamp;
printf("starting up ...\n");
}
tcp:::send, tcp:::receive
/ title == 0 &&
( args[2]->ip_daddr == ADDR || ADDR == NULL ) /
{
printf("%6s %6s %6s %8s %8s %8s %8s %8s %8s %8s %8s %8s \n",
"sbytes",
"rbytes",
"delta" ,
"send" ,
"recd" ,
"ssz" ,
"sscal" ,
"rsz",
"rscal",
"congw",
"conthr",
"retran"
);
}
tcp:::send
/ ( args[2]->ip_daddr == ADDR || ADDR == NULL ) /
{
nfs[args[1]->cs_cid]=1; /* this is an NFS thread */
delta= timestamp-walltime;
walltime=timestamp;
flags="";
flags= strjoin((( args[4]->tcp_flags & TH_FIN ) ? "FIN|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_SYN ) ? "SYN|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_RST ) ? "RST|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_PUSH ) ? "PUSH|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_ACK ) ? "ACK|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_URG ) ? "URG|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_ECE ) ? "ECE|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_CWR ) ? "CWR|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags == 0 ) ? "null " : ""),flags);
printf("%6d %6d %6d %8d \ %-8s %8d %8d %8d %8d %8d %12d %s %d \n",
/*
printf("%6d %6d %6d %8d \ %8s %8d %8d %8d %8d %8d %12d %12d %12d %8d
%8d %s %d \n",
args[4]->tcp_seq %1000000,
args[4]->tcp_ack %1000000,
*/
args[3]->tcps_snxt - args[3]->tcps_suna ,
args[3]->tcps_rnxt - args[3]->tcps_rack,
delta/1000,
args[2]->ip_plength - args[4]->tcp_offset,
"",
args[3]->tcps_swnd,
args[3]->tcps_snd_ws,
args[3]->tcps_rwnd,
args[3]->tcps_rcv_ws,
args[3]->tcps_cwnd,
args[3]->tcps_cwnd_ssthresh,
/*
args[3]->tcps_sack_fack,
args[3]->tcps_sack_snxt,
args[3]->tcps_rto,
args[3]->tcps_mss,
*/
flags,
args[3]->tcps_retransmit
);
flags=0;
title--;
}
tcp:::receive
/ ( args[2]->ip_saddr == ADDR || ADDR == NULL ) && nfs[args[1]->cs_cid] /
{
delta=timestamp-walltime;
walltime=timestamp;
flags="";
flags= strjoin((( args[4]->tcp_flags & TH_FIN ) ? "FIN|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_SYN ) ? "SYN|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_RST ) ? "RST|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_PUSH ) ? "PUSH|" :
""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_ACK ) ? "ACK|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_URG ) ? "URG|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_ECE ) ? "ECE|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_CWR ) ? "CWR|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags == 0 ) ? "null " : ""),flags);
printf("%6d %6d %6d %8s / %-8d %8d %8d %8d %8d %8d %12d %s %d \n",
/*
printf("%6d %6d %6d %8s / %-8d %8d %8d %8d %8d %8d %12d %12d %12d
%8d %8d %s %d \n",
args[4]->tcp_ack %1000000,
args[4]->tcp_seq %1000000,
*/
args[3]->tcps_snxt - args[3]->tcps_suna ,
args[3]->tcps_rnxt - args[3]->tcps_rack,
delta/1000,
"",
args[2]->ip_plength - args[4]->tcp_offset,
args[3]->tcps_swnd,
args[3]->tcps_snd_ws,
args[3]->tcps_rwnd,
args[3]->tcps_rcv_ws,
args[3]->tcps_cwnd,
args[3]->tcps_cwnd_ssthresh,
flags,
/*
args[3]->tcps_sack_fack,
args[3]->tcps_sack_snxt,
args[3]->tcps_rto,
args[3]->tcps_mss,
*/
args[3]->tcps_retransmit
);
flags=0;
title--;
}
If I do
time dd if=/dev/zero of=foo bs=8k count=1
where the output file is on an NFS mount and dtrace the TCP send, receives
and look at the delta of the times, the last acknowledgement happens after
the dd has finished.
I'm imagining the the last ACK is a delayed acknowledgement.
I want to be able to measure the round trip speeds, but these slow ACKs
throw off the measurements . I want to be able to filter them out if
possible.
Here is an example
time dd if=/dev/zero of=foo bs=8k count=1
1+0 records in
1+0 records out
8192 bytes (8.2 kB) copied, 0.00193471 s, 4.2 MB/s
real 0m0.014s
user 0m0.001s
sys 0m0.006s
sudo nfslat.d 1 192.86.100.186
starting up ...
sbytes rbytes delta send recd flags
unackd unackd us bytes bytes
240 0 439 / 68 ACK|PUSH|
244 0 175 244 \ ACK|PUSH|
244 0 308 / 312 ACK|PUSH|
252 0 467 252 \ ACK|PUSH|
252 0 320 / 76 ACK|PUSH|
244 0 1717 244 \ ACK|PUSH|
244 0 268 / 312 ACK|PUSH|
168 0 208 168 \ ACK|PUSH|
168 0 250 / 388 ACK|PUSH|
244 0 199 244 \ ACK|PUSH|
244 0 260 / 312 ACK|PUSH|
176 0 149 176 \ ACK|PUSH|
176 0 250 / 248 ACK|PUSH|
280 0 159 280 \ ACK|PUSH|
280 0 606 / 532 ACK|PUSH|
176 0 173 176 \ ACK|PUSH|
176 0 245 / 84 ACK|PUSH|
0 84 380 1448 \ ACK|
0 84 58 1448 \ ACK|
0 84 36 1448 \ ACK|
0 84 34 1448 \ ACK|
0 84 36 1448 \ ACK|
0 84 34 1140 \ ACK|PUSH|
8380 0 360 / 84 ACK|PUSH|
168 0 144 168 \ ACK|PUSH|
168 0 471 / 76 ACK|PUSH|
192 0 116 192 \ ACK|PUSH|
192 0 275 / 256 ACK|PUSH|
244 0 5125 244 \ ACK|PUSH|
244 0 382 / 68 ACK|PUSH|
0 0 50698 0 \ ACK| 0
So the total dd time was 14ms where as the last ACK alone was 50ms.
Code
#!/usr/sbin/dtrace -s
#pragma D option quiet
#pragma D option defaultargs
inline int TICKS=$1;
inline string ADDR=$$2;
dtrace:::BEGIN
{
TIMER = ( TICKS != NULL ) ? TICKS : 1 ;
ticks = TIMER;
TITLE = 10;
title = 0;
walltime=timestamp;
printf("starting up ...\n");
}
tcp:::send, tcp:::receive
/ title == 0 &&
( args[2]->ip_daddr == ADDR || ADDR == NULL ) /
{
printf("%6s %6s %6s %8s %8s %8s %8s %8s %8s %8s %8s %8s \n",
"sbytes",
"rbytes",
"delta" ,
"send" ,
"recd" ,
"ssz" ,
"sscal" ,
"rsz",
"rscal",
"congw",
"conthr",
"retran"
);
}
tcp:::send
/ ( args[2]->ip_daddr == ADDR || ADDR == NULL ) /
{
nfs[args[1]->cs_cid]=1; /* this is an NFS thread */
delta= timestamp-walltime;
walltime=timestamp;
flags="";
flags= strjoin((( args[4]->tcp_flags & TH_FIN ) ? "FIN|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_SYN ) ? "SYN|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_RST ) ? "RST|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_PUSH ) ? "PUSH|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_ACK ) ? "ACK|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_URG ) ? "URG|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_ECE ) ? "ECE|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_CWR ) ? "CWR|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags == 0 ) ? "null " : ""),flags);
printf("%6d %6d %6d %8d \ %-8s %8d %8d %8d %8d %8d %12d %s %d \n",
/*
printf("%6d %6d %6d %8d \ %8s %8d %8d %8d %8d %8d %12d %12d %12d %8d
%8d %s %d \n",
args[4]->tcp_seq %1000000,
args[4]->tcp_ack %1000000,
*/
args[3]->tcps_snxt - args[3]->tcps_suna ,
args[3]->tcps_rnxt - args[3]->tcps_rack,
delta/1000,
args[2]->ip_plength - args[4]->tcp_offset,
"",
args[3]->tcps_swnd,
args[3]->tcps_snd_ws,
args[3]->tcps_rwnd,
args[3]->tcps_rcv_ws,
args[3]->tcps_cwnd,
args[3]->tcps_cwnd_ssthresh,
/*
args[3]->tcps_sack_fack,
args[3]->tcps_sack_snxt,
args[3]->tcps_rto,
args[3]->tcps_mss,
*/
flags,
args[3]->tcps_retransmit
);
flags=0;
title--;
}
tcp:::receive
/ ( args[2]->ip_saddr == ADDR || ADDR == NULL ) && nfs[args[1]->cs_cid] /
{
delta=timestamp-walltime;
walltime=timestamp;
flags="";
flags= strjoin((( args[4]->tcp_flags & TH_FIN ) ? "FIN|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_SYN ) ? "SYN|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_RST ) ? "RST|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_PUSH ) ? "PUSH|" :
""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_ACK ) ? "ACK|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_URG ) ? "URG|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_ECE ) ? "ECE|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_CWR ) ? "CWR|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags == 0 ) ? "null " : ""),flags);
printf("%6d %6d %6d %8s / %-8d %8d %8d %8d %8d %8d %12d %s %d \n",
/*
printf("%6d %6d %6d %8s / %-8d %8d %8d %8d %8d %8d %12d %12d %12d
%8d %8d %s %d \n",
args[4]->tcp_ack %1000000,
args[4]->tcp_seq %1000000,
*/
args[3]->tcps_snxt - args[3]->tcps_suna ,
args[3]->tcps_rnxt - args[3]->tcps_rack,
delta/1000,
"",
args[2]->ip_plength - args[4]->tcp_offset,
args[3]->tcps_swnd,
args[3]->tcps_snd_ws,
args[3]->tcps_rwnd,
args[3]->tcps_rcv_ws,
args[3]->tcps_cwnd,
args[3]->tcps_cwnd_ssthresh,
flags,
/*
args[3]->tcps_sack_fack,
args[3]->tcps_sack_snxt,
args[3]->tcps_rto,
args[3]->tcps_mss,
*/
args[3]->tcps_retransmit
);
flags=0;
title--;
}
--
- Kyle
O: +1.415.341.3430
F: +1.650.494.1676
275 Middlefield Road, Suite 50
Menlo Park, CA 94025
http://www.delphix.com
- Kyle
O: +1.415.341.3430
F: +1.650.494.1676
275 Middlefield Road, Suite 50
Menlo Park, CA 94025
http://www.delphix.com