1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#pragma D depends_on module unix
26#pragma D depends_on provider tcp
27
28inline int TH_FIN = @TH_FIN@;
29#pragma D binding "1.6.3" TH_FIN
30inline int TH_SYN = @TH_SYN@;
31#pragma D binding "1.6.3" TH_SYN
32inline int TH_RST = @TH_RST@;
33#pragma D binding "1.6.3" TH_RST
34inline int TH_PUSH = @TH_PUSH@;
35#pragma D binding "1.6.3" TH_PUSH
36inline int TH_ACK = @TH_ACK@;
37#pragma D binding "1.6.3" TH_ACK
38inline int TH_URG = @TH_URG@;
39#pragma D binding "1.6.3" TH_URG
40inline int TH_ECE = @TH_ECE@;
41#pragma D binding "1.6.3" TH_ECE
42inline int TH_CWR = @TH_CWR@;
43#pragma D binding "1.6.3" TH_CWR
44
45inline int32_t TCP_STATE_CLOSED = @TCPS_CLOSED@;
46#pragma D binding "1.6.3" TCP_STATE_CLOSED
47inline int32_t TCP_STATE_IDLE = @TCPS_IDLE@;
48#pragma D binding "1.6.3" TCP_STATE_IDLE
49inline int32_t TCP_STATE_BOUND = @TCPS_BOUND@;
50#pragma D binding "1.6.3" TCP_STATE_BOUND
51inline int32_t TCP_STATE_LISTEN = @TCPS_LISTEN@;
52#pragma D binding "1.6.3" TCP_STATE_LISTEN
53inline int32_t TCP_STATE_SYN_SENT = @TCPS_SYN_SENT@;
54#pragma D binding "1.6.3" TCP_STATE_SYN_SENT
55inline int32_t TCP_STATE_SYN_RECEIVED = @TCPS_SYN_RCVD@;
56#pragma D binding "1.6.3" TCP_STATE_SYN_RECEIVED
57inline int32_t TCP_STATE_ESTABLISHED = @TCPS_ESTABLISHED@;
58#pragma D binding "1.6.3" TCP_STATE_ESTABLISHED
59inline int32_t TCP_STATE_CLOSE_WAIT = @TCPS_CLOSE_WAIT@;
60#pragma D binding "1.6.3" TCP_STATE_CLOSE_WAIT
61inline int32_t TCP_STATE_FIN_WAIT_1 = @TCPS_FIN_WAIT_1@;
62#pragma D binding "1.6.3" TCP_STATE_FIN_WAIT_1
63inline int32_t TCP_STATE_CLOSING = @TCPS_CLOSING@;
64#pragma D binding "1.6.3" TCP_STATE_CLOSING
65inline int32_t TCP_STATE_LAST_ACK = @TCPS_LAST_ACK@;
66#pragma D binding "1.6.3" TCP_STATE_LAST_ACK
67inline int32_t TCP_STATE_FIN_WAIT_2 = @TCPS_FIN_WAIT_2@;
68#pragma D binding "1.6.3" TCP_STATE_FIN_WAIT_2
69inline int32_t TCP_STATE_TIME_WAIT = @TCPS_TIME_WAIT@;
70#pragma D binding "1.6.3" TCP_STATE_TIME_WAIT
71
72/*
73 * Convert a TCP state value to a string.
74 */
75inline string tcp_state_string[int32_t state] =
76	state == TCP_STATE_CLOSED ? "state-closed" :
77	state == TCP_STATE_IDLE ? "state-idle" :
78	state == TCP_STATE_BOUND ? "state-bound" :
79	state == TCP_STATE_LISTEN ? "state-listen" :
80	state == TCP_STATE_SYN_SENT ? "state-syn-sent" :
81	state == TCP_STATE_SYN_RECEIVED ? "state-syn-received" :
82	state == TCP_STATE_ESTABLISHED ? "state-established" :
83	state == TCP_STATE_CLOSE_WAIT ? "state-close-wait" :
84	state == TCP_STATE_FIN_WAIT_1 ? "state-fin-wait-1" :
85	state == TCP_STATE_CLOSING ? "state-closing" :
86	state == TCP_STATE_LAST_ACK ? "state-last-ack" :
87	state == TCP_STATE_FIN_WAIT_2 ? "state-fin-wait-2" :
88	state == TCP_STATE_TIME_WAIT ? "state-time-wait" :
89	"<unknown>";
90#pragma D binding "1.6.3" tcp_state_string
91
92/*
93 * tcpinfo is the TCP header fields.
94 */
95typedef struct tcpinfo {
96	uint16_t tcp_sport;		/* source port */
97	uint16_t tcp_dport;		/* destination port */
98	uint32_t tcp_seq;		/* sequence number */
99	uint32_t tcp_ack;		/* acknowledgment number */
100	uint8_t tcp_offset;		/* data offset, in bytes */
101	uint8_t tcp_flags;		/* flags */
102	uint16_t tcp_window;		/* window size */
103	uint16_t tcp_checksum;		/* checksum */
104	uint16_t tcp_urgent;		/* urgent data pointer */
105	tcph_t *tcp_hdr;		/* raw TCP header */
106} tcpinfo_t;
107
108/*
109 * tcpsinfo contains stable TCP details from tcp_t.
110 */
111typedef struct tcpsinfo {
112	uintptr_t tcps_addr;
113	int tcps_local;			/* is delivered locally, boolean */
114	int tcps_active;		/* active open (from here), boolean */
115	uint16_t tcps_lport;		/* local port */
116	uint16_t tcps_rport;		/* remote port */
117	string tcps_laddr;		/* local address, as a string */
118	string tcps_raddr;		/* remote address, as a string */
119	int32_t tcps_state;		/* TCP state */
120	uint32_t tcps_iss;		/* Initial sequence # sent */
121	uint32_t tcps_suna;		/* sequence # sent but unacked */
122	uint32_t tcps_snxt;		/* next sequence # to send */
123	uint32_t tcps_rack;		/* sequence # we have acked */
124	uint32_t tcps_rnxt;		/* next sequence # expected */
125	uint32_t tcps_swnd;		/* send window size */
126	int32_t tcps_snd_ws;		/* send window scaling */
127	uint32_t tcps_rwnd;		/* receive window size */
128	int32_t tcps_rcv_ws;		/* receive window scaling */
129	uint32_t tcps_cwnd;		/* congestion window */
130	uint32_t tcps_cwnd_ssthresh;	/* threshold for congestion avoidance */
131	uint32_t tcps_sack_fack;	/* SACK sequence # we have acked */
132	uint32_t tcps_sack_snxt;	/* next SACK seq # for retransmission */
133	uint32_t tcps_rto;		/* round-trip timeout, msec */
134	uint32_t tcps_mss;		/* max segment size */
135	int tcps_retransmit;		/* retransmit send event, boolean */
136} tcpsinfo_t;
137
138/*
139 * tcplsinfo provides the old tcp state for state changes.
140 */
141typedef struct tcplsinfo {
142	int32_t tcps_state;		/* previous TCP state */
143} tcplsinfo_t;
144
145/*
146 * __dtrace_tcp_tcph_t is used by the tcpinfo_t * translator to take either
147 * the non-NULL tcph_t * passed in or, if it is NULL, uses arg3 (tcp_t *)
148 * from the tcp:::send and tcp:::recieve probes and translates the tcp_t *
149 * into the tcpinfo_t.  When no headers are available - as is the case for
150 * TCP fusion tcp:::send and tcp:::receive - this allows us to present the
151 * consumer with header data based on tcp_t * content and hide TCP fusion
152 * implementation details.
153 */
154typedef tcph_t * __dtrace_tcp_tcph_t;
155
156#pragma D binding "1.6.3" translator
157translator tcpinfo_t < tcph_t *T > {
158	tcp_sport = ntohs(*(uint16_t *)T->th_lport);
159	tcp_dport = ntohs(*(uint16_t *)T->th_fport);
160	tcp_seq = ntohl(*(uint32_t *)T->th_seq);
161	tcp_ack = ntohl(*(uint32_t *)T->th_ack);
162	tcp_offset = (*(uint8_t *)T->th_offset_and_rsrvd & 0xf0) >> 2;
163	tcp_flags = *(uint8_t *)T->th_flags;
164	tcp_window = ntohs(*(uint16_t *)T->th_win);
165	tcp_checksum = ntohs(*(uint16_t *)T->th_sum);
166	tcp_urgent = ntohs(*(uint16_t *)T->th_urp);
167	tcp_hdr = T;
168};
169
170#pragma D binding "1.6.3" translator
171translator tcpinfo_t < __dtrace_tcp_tcph_t *T > {
172	tcp_sport =
173	    T != NULL ? ntohs(*(uint16_t *)((tcph_t *)T)->th_lport) :
174	    arg3 != NULL && probename == "send" ?
175	    ntohs(((tcp_t *)arg3)->tcp_connp->u_port.connu_ports.connu_lport) :
176	    arg3 != NULL && probename == "receive" ?
177	    ntohs(((tcp_t *)arg3)->tcp_connp->u_port.connu_ports.connu_fport) :
178	    0;
179	tcp_dport =
180	    T != NULL ? ntohs(*(uint16_t *)((tcph_t *)T)->th_fport) :
181	    arg3 != NULL && probename == "send" ?
182	    ntohs(((tcp_t *)arg3)->tcp_connp->u_port.connu_ports.connu_fport) :
183	    arg3 != NULL && probename == "receive" ?
184	    ntohs(((tcp_t *)arg3)->tcp_connp->u_port.connu_ports.connu_lport) :
185	    0;
186	tcp_seq =
187	    T != NULL ? ntohl(*(uint32_t *)((tcph_t *)T)->th_seq) :
188	    arg3 != NULL && probename == "send" ?
189	    ((tcp_t *)arg3)->tcp_snxt - ((tcp_t *)arg3)->tcp_last_sent_len :
190	    arg3 != NULL && probename == "receive" ?
191	    ((tcp_t *)arg3)->tcp_rnxt - ((tcp_t *)arg3)->tcp_last_recv_len :
192	    0;
193	tcp_ack =
194	    T != NULL ? ntohl(*(uint32_t *)((tcph_t *)T)->th_ack) :
195	    arg3 != NULL && probename == "send" ?
196	    ((tcp_t *)arg3)->tcp_rnxt :
197	    arg3 != NULL && probename == "receive" ?
198	    ((tcp_t *)arg3)->tcp_snxt :
199	    0;
200	tcp_offset = T != NULL ?
201	    (*(uint8_t *)((tcph_t *)T)->th_offset_and_rsrvd & 0xf0) >> 2 :
202	    @TCP_MIN_HEADER_LENGTH@;
203	tcp_flags = T != NULL ? *(uint8_t *)((tcph_t *)T)->th_flags : TH_ACK;
204	tcp_window = T != NULL ? ntohs(*(uint16_t *)((tcph_t *)T)->th_win) :
205	    arg3 != NULL ? ((tcp_t *)arg3)->tcp_swnd : 0;
206	tcp_checksum = T != NULL ? ntohs(*(uint16_t *)((tcph_t *)T)->th_sum) :
207	    0;
208	tcp_urgent = T != NULL ? ntohs(*(uint16_t *)((tcph_t *)T)->th_urp) : 0;
209	tcp_hdr = NULL;
210};
211
212#pragma D binding "1.6.3" translator
213translator tcpsinfo_t < tcp_t *T > {
214	tcps_addr = (uintptr_t)T;
215	/*
216	 * The following two members should just use tcp_t->tcp_loopback
217	 * and tcp_t->tcp_active_open, however these are bit fields and
218	 * can't be used until CR 6876830 is fixed.  Meanwhile we source
219	 * them a different way.
220	 */
221	tcps_local = T ? T->tcp_ipha ?
222	    T->tcp_ipha->ipha_src == T->tcp_ipha->ipha_dst : 1 : 0;
223	tcps_active = T ? !T->tcp_saved_listener : 0;
224	tcps_lport = T ?
225	    ntohs(T->tcp_connp->u_port.connu_ports.connu_lport) : 0;
226	tcps_rport = T ?
227	    ntohs(T->tcp_connp->u_port.connu_ports.connu_fport) : 0;
228	tcps_laddr = T ?
229	    inet_ntoa6(&T->tcp_connp->connua_v6addr.connua_laddr) : "<unknown>";
230	tcps_raddr = T ?
231	    inet_ntoa6(&T->tcp_connp->connua_v6addr.connua_faddr) : "<unknown>";
232	tcps_state = T ? T->tcp_state : TCP_STATE_CLOSED;
233	tcps_iss = T ? T->tcp_iss : 0;
234	tcps_suna = T ? T->tcp_suna : 0;
235	tcps_snxt = T ? T->tcp_snxt : 0;
236	tcps_rack = T ? T->tcp_rack : 0;
237	tcps_rnxt = T ? T->tcp_rnxt : 0;
238	tcps_swnd = T ? T->tcp_swnd : 0;
239	tcps_snd_ws = T ? T->tcp_snd_ws : 0;
240	tcps_rwnd = T ? T->tcp_rwnd : 0;
241	tcps_rcv_ws = T ? T->tcp_rcv_ws : 0;
242	tcps_cwnd = T ? T->tcp_cwnd : 0;
243	tcps_cwnd_ssthresh = T ? T->tcp_cwnd_ssthresh : 0;
244	tcps_sack_fack = T ? T->tcp_sack_info.tcp_fack : 0;
245	tcps_sack_snxt = T ? T->tcp_sack_info.tcp_sack_snxt : 0;
246	tcps_rto = T ? T->tcp_rto : 0;
247	tcps_mss = T ? T->tcp_mss : 0;
248	/*
249	 * Determine if send is a retransmission by comparing the seq # to
250	 * tcp_rexmit_nxt/tcp_rexmit_max - if the value is >= rexmit_nxt and
251	 * < rexmit_max, this is a retransmission.  Cannot use tcp_rexmit
252	 * bitfield value due to CR 6876830.
253	 */
254	tcps_retransmit = T && probename == "send" && arg4 != NULL &&
255	    ntohl(*(uint32_t *)((tcph_t *)arg4)->th_seq) >= T->tcp_rexmit_nxt &&
256	    ntohl(*(uint32_t *)((tcph_t *)arg4)->th_seq) <  T->tcp_rexmit_max ?
257	    1 : 0;
258};
259
260/*
261 * Note: although we specify that the old state argument used as the
262 * input to the tcplsinfo_t translator is an int32_t, it reaches us as an
263 * int64_t (since it is a probe argument) so explicitly cast it back to
264 * interpret the negatively-valued states correctly.
265 */
266#pragma D binding "1.6.3" translator
267translator tcplsinfo_t < int64_t I > {
268	tcps_state = (int32_t) I;
269};
270