1*9cd928feSAlan Maguire/*
2*9cd928feSAlan Maguire * CDDL HEADER START
3*9cd928feSAlan Maguire *
4*9cd928feSAlan Maguire * The contents of this file are subject to the terms of the
5*9cd928feSAlan Maguire * Common Development and Distribution License (the "License").
6*9cd928feSAlan Maguire * You may not use this file except in compliance with the License.
7*9cd928feSAlan Maguire *
8*9cd928feSAlan Maguire * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*9cd928feSAlan Maguire * or http://www.opensolaris.org/os/licensing.
10*9cd928feSAlan Maguire * See the License for the specific language governing permissions
11*9cd928feSAlan Maguire * and limitations under the License.
12*9cd928feSAlan Maguire *
13*9cd928feSAlan Maguire * When distributing Covered Code, include this CDDL HEADER in each
14*9cd928feSAlan Maguire * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*9cd928feSAlan Maguire * If applicable, add the following below this CDDL HEADER, with the
16*9cd928feSAlan Maguire * fields enclosed by brackets "[]" replaced with your own identifying
17*9cd928feSAlan Maguire * information: Portions Copyright [yyyy] [name of copyright owner]
18*9cd928feSAlan Maguire *
19*9cd928feSAlan Maguire * CDDL HEADER END
20*9cd928feSAlan Maguire */
21*9cd928feSAlan Maguire/*
22*9cd928feSAlan Maguire * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23*9cd928feSAlan Maguire */
24*9cd928feSAlan Maguire
25*9cd928feSAlan Maguire#pragma D depends_on module unix
26*9cd928feSAlan Maguire#pragma D depends_on provider tcp
27*9cd928feSAlan Maguire
28*9cd928feSAlan Maguireinline int TH_FIN = @TH_FIN@;
29*9cd928feSAlan Maguire#pragma D binding "1.6.3" TH_FIN
30*9cd928feSAlan Maguireinline int TH_SYN = @TH_SYN@;
31*9cd928feSAlan Maguire#pragma D binding "1.6.3" TH_SYN
32*9cd928feSAlan Maguireinline int TH_RST = @TH_RST@;
33*9cd928feSAlan Maguire#pragma D binding "1.6.3" TH_RST
34*9cd928feSAlan Maguireinline int TH_PUSH = @TH_PUSH@;
35*9cd928feSAlan Maguire#pragma D binding "1.6.3" TH_PUSH
36*9cd928feSAlan Maguireinline int TH_ACK = @TH_ACK@;
37*9cd928feSAlan Maguire#pragma D binding "1.6.3" TH_ACK
38*9cd928feSAlan Maguireinline int TH_URG = @TH_URG@;
39*9cd928feSAlan Maguire#pragma D binding "1.6.3" TH_URG
40*9cd928feSAlan Maguireinline int TH_ECE = @TH_ECE@;
41*9cd928feSAlan Maguire#pragma D binding "1.6.3" TH_ECE
42*9cd928feSAlan Maguireinline int TH_CWR = @TH_CWR@;
43*9cd928feSAlan Maguire#pragma D binding "1.6.3" TH_CWR
44*9cd928feSAlan Maguire
45*9cd928feSAlan Maguireinline int32_t TCP_STATE_CLOSED = @TCPS_CLOSED@;
46*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_CLOSED
47*9cd928feSAlan Maguireinline int32_t TCP_STATE_IDLE = @TCPS_IDLE@;
48*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_IDLE
49*9cd928feSAlan Maguireinline int32_t TCP_STATE_BOUND = @TCPS_BOUND@;
50*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_BOUND
51*9cd928feSAlan Maguireinline int32_t TCP_STATE_LISTEN = @TCPS_LISTEN@;
52*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_LISTEN
53*9cd928feSAlan Maguireinline int32_t TCP_STATE_SYN_SENT = @TCPS_SYN_SENT@;
54*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_SYN_SENT
55*9cd928feSAlan Maguireinline int32_t TCP_STATE_SYN_RECEIVED = @TCPS_SYN_RCVD@;
56*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_SYN_RECEIVED
57*9cd928feSAlan Maguireinline int32_t TCP_STATE_ESTABLISHED = @TCPS_ESTABLISHED@;
58*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_ESTABLISHED
59*9cd928feSAlan Maguireinline int32_t TCP_STATE_CLOSE_WAIT = @TCPS_CLOSE_WAIT@;
60*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_CLOSE_WAIT
61*9cd928feSAlan Maguireinline int32_t TCP_STATE_FIN_WAIT_1 = @TCPS_FIN_WAIT_1@;
62*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_FIN_WAIT_1
63*9cd928feSAlan Maguireinline int32_t TCP_STATE_CLOSING = @TCPS_CLOSING@;
64*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_CLOSING
65*9cd928feSAlan Maguireinline int32_t TCP_STATE_LAST_ACK = @TCPS_LAST_ACK@;
66*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_LAST_ACK
67*9cd928feSAlan Maguireinline int32_t TCP_STATE_FIN_WAIT_2 = @TCPS_FIN_WAIT_2@;
68*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_FIN_WAIT_2
69*9cd928feSAlan Maguireinline int32_t TCP_STATE_TIME_WAIT = @TCPS_TIME_WAIT@;
70*9cd928feSAlan Maguire#pragma D binding "1.6.3" TCP_STATE_TIME_WAIT
71*9cd928feSAlan Maguire
72*9cd928feSAlan Maguire/*
73*9cd928feSAlan Maguire * Convert a TCP state value to a string.
74*9cd928feSAlan Maguire */
75*9cd928feSAlan Maguireinline string tcp_state_string[int32_t state] =
76*9cd928feSAlan Maguire	state == TCP_STATE_CLOSED ? "state-closed" :
77*9cd928feSAlan Maguire	state == TCP_STATE_IDLE ? "state-idle" :
78*9cd928feSAlan Maguire	state == TCP_STATE_BOUND ? "state-bound" :
79*9cd928feSAlan Maguire	state == TCP_STATE_LISTEN ? "state-listen" :
80*9cd928feSAlan Maguire	state == TCP_STATE_SYN_SENT ? "state-syn-sent" :
81*9cd928feSAlan Maguire	state == TCP_STATE_SYN_RECEIVED ? "state-syn-received" :
82*9cd928feSAlan Maguire	state == TCP_STATE_ESTABLISHED ? "state-established" :
83*9cd928feSAlan Maguire	state == TCP_STATE_CLOSE_WAIT ? "state-close-wait" :
84*9cd928feSAlan Maguire	state == TCP_STATE_FIN_WAIT_1 ? "state-fin-wait-1" :
85*9cd928feSAlan Maguire	state == TCP_STATE_CLOSING ? "state-closing" :
86*9cd928feSAlan Maguire	state == TCP_STATE_LAST_ACK ? "state-last-ack" :
87*9cd928feSAlan Maguire	state == TCP_STATE_FIN_WAIT_2 ? "state-fin-wait-2" :
88*9cd928feSAlan Maguire	state == TCP_STATE_TIME_WAIT ? "state-time-wait" :
89*9cd928feSAlan Maguire	"<unknown>";
90*9cd928feSAlan Maguire#pragma D binding "1.6.3" tcp_state_string
91*9cd928feSAlan Maguire
92*9cd928feSAlan Maguire/*
93*9cd928feSAlan Maguire * tcpinfo is the TCP header fields.
94*9cd928feSAlan Maguire */
95*9cd928feSAlan Maguiretypedef struct tcpinfo {
96*9cd928feSAlan Maguire	uint16_t tcp_sport;		/* source port */
97*9cd928feSAlan Maguire	uint16_t tcp_dport;		/* destination port */
98*9cd928feSAlan Maguire	uint32_t tcp_seq;		/* sequence number */
99*9cd928feSAlan Maguire	uint32_t tcp_ack;		/* acknowledgment number */
100*9cd928feSAlan Maguire	uint8_t tcp_offset;		/* data offset, in bytes */
101*9cd928feSAlan Maguire	uint8_t tcp_flags;		/* flags */
102*9cd928feSAlan Maguire	uint16_t tcp_window;		/* window size */
103*9cd928feSAlan Maguire	uint16_t tcp_checksum;		/* checksum */
104*9cd928feSAlan Maguire	uint16_t tcp_urgent;		/* urgent data pointer */
105*9cd928feSAlan Maguire	tcph_t *tcp_hdr;		/* raw TCP header */
106*9cd928feSAlan Maguire} tcpinfo_t;
107*9cd928feSAlan Maguire
108*9cd928feSAlan Maguire/*
109*9cd928feSAlan Maguire * tcpsinfo contains stable TCP details from tcp_t.
110*9cd928feSAlan Maguire */
111*9cd928feSAlan Maguiretypedef struct tcpsinfo {
112*9cd928feSAlan Maguire	uintptr_t tcps_addr;
113*9cd928feSAlan Maguire	int tcps_local;			/* is delivered locally, boolean */
114*9cd928feSAlan Maguire	int tcps_active;		/* active open (from here), boolean */
115*9cd928feSAlan Maguire	uint16_t tcps_lport;		/* local port */
116*9cd928feSAlan Maguire	uint16_t tcps_rport;		/* remote port */
117*9cd928feSAlan Maguire	string tcps_laddr;		/* local address, as a string */
118*9cd928feSAlan Maguire	string tcps_raddr;		/* remote address, as a string */
119*9cd928feSAlan Maguire	int32_t tcps_state;		/* TCP state */
120*9cd928feSAlan Maguire	uint32_t tcps_iss;		/* Initial sequence # sent */
121*9cd928feSAlan Maguire	uint32_t tcps_suna;		/* sequence # sent but unacked */
122*9cd928feSAlan Maguire	uint32_t tcps_snxt;		/* next sequence # to send */
123*9cd928feSAlan Maguire	uint32_t tcps_rack;		/* sequence # we have acked */
124*9cd928feSAlan Maguire	uint32_t tcps_rnxt;		/* next sequence # expected */
125*9cd928feSAlan Maguire	uint32_t tcps_swnd;		/* send window size */
126*9cd928feSAlan Maguire	int32_t tcps_snd_ws;		/* send window scaling */
127*9cd928feSAlan Maguire	uint32_t tcps_rwnd;		/* receive window size */
128*9cd928feSAlan Maguire	int32_t tcps_rcv_ws;		/* receive window scaling */
129*9cd928feSAlan Maguire	uint32_t tcps_cwnd;		/* congestion window */
130*9cd928feSAlan Maguire	uint32_t tcps_cwnd_ssthresh;	/* threshold for congestion avoidance */
131*9cd928feSAlan Maguire	uint32_t tcps_sack_fack;	/* SACK sequence # we have acked */
132*9cd928feSAlan Maguire	uint32_t tcps_sack_snxt;	/* next SACK seq # for retransmission */
133*9cd928feSAlan Maguire	uint32_t tcps_rto;		/* round-trip timeout, msec */
134*9cd928feSAlan Maguire	uint32_t tcps_mss;		/* max segment size */
135*9cd928feSAlan Maguire	int tcps_retransmit;		/* retransmit send event, boolean */
136*9cd928feSAlan Maguire} tcpsinfo_t;
137*9cd928feSAlan Maguire
138*9cd928feSAlan Maguire/*
139*9cd928feSAlan Maguire * tcplsinfo provides the old tcp state for state changes.
140*9cd928feSAlan Maguire */
141*9cd928feSAlan Maguiretypedef struct tcplsinfo {
142*9cd928feSAlan Maguire	int32_t tcps_state;		/* previous TCP state */
143*9cd928feSAlan Maguire} tcplsinfo_t;
144*9cd928feSAlan Maguire
145*9cd928feSAlan Maguire/*
146*9cd928feSAlan Maguire * __dtrace_tcp_tcph_t is used by the tcpinfo_t * translator to take either
147*9cd928feSAlan Maguire * the non-NULL tcph_t * passed in or, if it is NULL, uses arg3 (tcp_t *)
148*9cd928feSAlan Maguire * from the tcp:::send and tcp:::recieve probes and translates the tcp_t *
149*9cd928feSAlan Maguire * into the tcpinfo_t.  When no headers are available - as is the case for
150*9cd928feSAlan Maguire * TCP fusion tcp:::send and tcp:::receive - this allows us to present the
151*9cd928feSAlan Maguire * consumer with header data based on tcp_t * content and hide TCP fusion
152*9cd928feSAlan Maguire * implementation details.
153*9cd928feSAlan Maguire */
154*9cd928feSAlan Maguiretypedef tcph_t * __dtrace_tcp_tcph_t;
155*9cd928feSAlan Maguire
156*9cd928feSAlan Maguire#pragma D binding "1.6.3" translator
157*9cd928feSAlan Maguiretranslator tcpinfo_t < tcph_t *T > {
158*9cd928feSAlan Maguire	tcp_sport = ntohs(*(uint16_t *)T->th_lport);
159*9cd928feSAlan Maguire	tcp_dport = ntohs(*(uint16_t *)T->th_fport);
160*9cd928feSAlan Maguire	tcp_seq = ntohl(*(uint32_t *)T->th_seq);
161*9cd928feSAlan Maguire	tcp_ack = ntohl(*(uint32_t *)T->th_ack);
162*9cd928feSAlan Maguire	tcp_offset = (*(uint8_t *)T->th_offset_and_rsrvd & 0xf0) >> 2;
163*9cd928feSAlan Maguire	tcp_flags = *(uint8_t *)T->th_flags;
164*9cd928feSAlan Maguire	tcp_window = ntohs(*(uint16_t *)T->th_win);
165*9cd928feSAlan Maguire	tcp_checksum = ntohs(*(uint16_t *)T->th_sum);
166*9cd928feSAlan Maguire	tcp_urgent = ntohs(*(uint16_t *)T->th_urp);
167*9cd928feSAlan Maguire	tcp_hdr = T;
168*9cd928feSAlan Maguire};
169*9cd928feSAlan Maguire
170*9cd928feSAlan Maguire#pragma D binding "1.6.3" translator
171*9cd928feSAlan Maguiretranslator tcpinfo_t < __dtrace_tcp_tcph_t *T > {
172*9cd928feSAlan Maguire	tcp_sport =
173*9cd928feSAlan Maguire	    T != NULL ? ntohs(*(uint16_t *)((tcph_t *)T)->th_lport) :
174*9cd928feSAlan Maguire	    arg3 != NULL && probename == "send" ?
175*9cd928feSAlan Maguire	    ntohs(((tcp_t *)arg3)->tcp_connp->u_port.connu_ports.connu_lport) :
176*9cd928feSAlan Maguire	    arg3 != NULL && probename == "receive" ?
177*9cd928feSAlan Maguire	    ntohs(((tcp_t *)arg3)->tcp_connp->u_port.connu_ports.connu_fport) :
178*9cd928feSAlan Maguire	    0;
179*9cd928feSAlan Maguire	tcp_dport =
180*9cd928feSAlan Maguire	    T != NULL ? ntohs(*(uint16_t *)((tcph_t *)T)->th_fport) :
181*9cd928feSAlan Maguire	    arg3 != NULL && probename == "send" ?
182*9cd928feSAlan Maguire	    ntohs(((tcp_t *)arg3)->tcp_connp->u_port.connu_ports.connu_fport) :
183*9cd928feSAlan Maguire	    arg3 != NULL && probename == "receive" ?
184*9cd928feSAlan Maguire	    ntohs(((tcp_t *)arg3)->tcp_connp->u_port.connu_ports.connu_lport) :
185*9cd928feSAlan Maguire	    0;
186*9cd928feSAlan Maguire	tcp_seq =
187*9cd928feSAlan Maguire	    T != NULL ? ntohl(*(uint32_t *)((tcph_t *)T)->th_seq) :
188*9cd928feSAlan Maguire	    arg3 != NULL && probename == "send" ?
189*9cd928feSAlan Maguire	    ((tcp_t *)arg3)->tcp_snxt - ((tcp_t *)arg3)->tcp_last_sent_len :
190*9cd928feSAlan Maguire	    arg3 != NULL && probename == "receive" ?
191*9cd928feSAlan Maguire	    ((tcp_t *)arg3)->tcp_rnxt - ((tcp_t *)arg3)->tcp_last_recv_len :
192*9cd928feSAlan Maguire	    0;
193*9cd928feSAlan Maguire	tcp_ack =
194*9cd928feSAlan Maguire	    T != NULL ? ntohl(*(uint32_t *)((tcph_t *)T)->th_ack) :
195*9cd928feSAlan Maguire	    arg3 != NULL && probename == "send" ?
196*9cd928feSAlan Maguire	    ((tcp_t *)arg3)->tcp_rnxt :
197*9cd928feSAlan Maguire	    arg3 != NULL && probename == "receive" ?
198*9cd928feSAlan Maguire	    ((tcp_t *)arg3)->tcp_snxt :
199*9cd928feSAlan Maguire	    0;
200*9cd928feSAlan Maguire	tcp_offset = T != NULL ?
201*9cd928feSAlan Maguire	    (*(uint8_t *)((tcph_t *)T)->th_offset_and_rsrvd & 0xf0) >> 2 :
202*9cd928feSAlan Maguire	    @TCP_MIN_HEADER_LENGTH@;
203*9cd928feSAlan Maguire	tcp_flags = T != NULL ? *(uint8_t *)((tcph_t *)T)->th_flags : TH_ACK;
204*9cd928feSAlan Maguire	tcp_window = T != NULL ? ntohs(*(uint16_t *)((tcph_t *)T)->th_win) :
205*9cd928feSAlan Maguire	    arg3 != NULL ? ((tcp_t *)arg3)->tcp_swnd : 0;
206*9cd928feSAlan Maguire	tcp_checksum = T != NULL ? ntohs(*(uint16_t *)((tcph_t *)T)->th_sum) :
207*9cd928feSAlan Maguire	    0;
208*9cd928feSAlan Maguire	tcp_urgent = T != NULL ? ntohs(*(uint16_t *)((tcph_t *)T)->th_urp) : 0;
209*9cd928feSAlan Maguire	tcp_hdr = NULL;
210*9cd928feSAlan Maguire};
211*9cd928feSAlan Maguire
212*9cd928feSAlan Maguire#pragma D binding "1.6.3" translator
213*9cd928feSAlan Maguiretranslator tcpsinfo_t < tcp_t *T > {
214*9cd928feSAlan Maguire	tcps_addr = (uintptr_t)T;
215*9cd928feSAlan Maguire	/*
216*9cd928feSAlan Maguire	 * The following two members should just use tcp_t->tcp_loopback
217*9cd928feSAlan Maguire	 * and tcp_t->tcp_active_open, however these are bit fields and
218*9cd928feSAlan Maguire	 * can't be used until CR 6876830 is fixed.  Meanwhile we source
219*9cd928feSAlan Maguire	 * them a different way.
220*9cd928feSAlan Maguire	 */
221*9cd928feSAlan Maguire	tcps_local = T ? T->tcp_ipha ?
222*9cd928feSAlan Maguire	    T->tcp_ipha->ipha_src == T->tcp_ipha->ipha_dst : 1 : 0;
223*9cd928feSAlan Maguire	tcps_active = T ? !T->tcp_saved_listener : 0;
224*9cd928feSAlan Maguire	tcps_lport = T ?
225*9cd928feSAlan Maguire	    ntohs(T->tcp_connp->u_port.connu_ports.connu_lport) : 0;
226*9cd928feSAlan Maguire	tcps_rport = T ?
227*9cd928feSAlan Maguire	    ntohs(T->tcp_connp->u_port.connu_ports.connu_fport) : 0;
228*9cd928feSAlan Maguire	tcps_laddr = T ?
229*9cd928feSAlan Maguire	    inet_ntoa6(&T->tcp_connp->connua_v6addr.connua_laddr) : "<unknown>";
230*9cd928feSAlan Maguire	tcps_raddr = T ?
231*9cd928feSAlan Maguire	    inet_ntoa6(&T->tcp_connp->connua_v6addr.connua_faddr) : "<unknown>";
232*9cd928feSAlan Maguire	tcps_state = T ? T->tcp_state : TCP_STATE_CLOSED;
233*9cd928feSAlan Maguire	tcps_iss = T ? T->tcp_iss : 0;
234*9cd928feSAlan Maguire	tcps_suna = T ? T->tcp_suna : 0;
235*9cd928feSAlan Maguire	tcps_snxt = T ? T->tcp_snxt : 0;
236*9cd928feSAlan Maguire	tcps_rack = T ? T->tcp_rack : 0;
237*9cd928feSAlan Maguire	tcps_rnxt = T ? T->tcp_rnxt : 0;
238*9cd928feSAlan Maguire	tcps_swnd = T ? T->tcp_swnd : 0;
239*9cd928feSAlan Maguire	tcps_snd_ws = T ? T->tcp_snd_ws : 0;
240*9cd928feSAlan Maguire	tcps_rwnd = T ? T->tcp_rwnd : 0;
241*9cd928feSAlan Maguire	tcps_rcv_ws = T ? T->tcp_rcv_ws : 0;
242*9cd928feSAlan Maguire	tcps_cwnd = T ? T->tcp_cwnd : 0;
243*9cd928feSAlan Maguire	tcps_cwnd_ssthresh = T ? T->tcp_cwnd_ssthresh : 0;
244*9cd928feSAlan Maguire	tcps_sack_fack = T ? T->tcp_sack_info.tcp_fack : 0;
245*9cd928feSAlan Maguire	tcps_sack_snxt = T ? T->tcp_sack_info.tcp_sack_snxt : 0;
246*9cd928feSAlan Maguire	tcps_rto = T ? T->tcp_rto : 0;
247*9cd928feSAlan Maguire	tcps_mss = T ? T->tcp_mss : 0;
248*9cd928feSAlan Maguire	/*
249*9cd928feSAlan Maguire	 * Determine if send is a retransmission by comparing the seq # to
250*9cd928feSAlan Maguire	 * tcp_rexmit_nxt/tcp_rexmit_max - if the value is >= rexmit_nxt and
251*9cd928feSAlan Maguire	 * < rexmit_max, this is a retransmission.  Cannot use tcp_rexmit
252*9cd928feSAlan Maguire	 * bitfield value due to CR 6876830.
253*9cd928feSAlan Maguire	 */
254*9cd928feSAlan Maguire	tcps_retransmit = T && probename == "send" && arg4 != NULL &&
255*9cd928feSAlan Maguire	    ntohl(*(uint32_t *)((tcph_t *)arg4)->th_seq) >= T->tcp_rexmit_nxt &&
256*9cd928feSAlan Maguire	    ntohl(*(uint32_t *)((tcph_t *)arg4)->th_seq) <  T->tcp_rexmit_max ?
257*9cd928feSAlan Maguire	    1 : 0;
258*9cd928feSAlan Maguire};
259*9cd928feSAlan Maguire
260*9cd928feSAlan Maguire/*
261*9cd928feSAlan Maguire * Note: although we specify that the old state argument used as the
262*9cd928feSAlan Maguire * input to the tcplsinfo_t translator is an int32_t, it reaches us as an
263*9cd928feSAlan Maguire * int64_t (since it is a probe argument) so explicitly cast it back to
264*9cd928feSAlan Maguire * interpret the negatively-valued states correctly.
265*9cd928feSAlan Maguire */
266*9cd928feSAlan Maguire#pragma D binding "1.6.3" translator
267*9cd928feSAlan Maguiretranslator tcplsinfo_t < int64_t I > {
268*9cd928feSAlan Maguire	tcps_state = (int32_t) I;
269*9cd928feSAlan Maguire};
270