1c0dd49bdSEiji Ota /*
216e76cddSagiri  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3*48bbca81SDaniel Hoffman  * Copyright (c) 2016 by Delphix. All rights reserved.
4c0dd49bdSEiji Ota  */
516e76cddSagiri 
6c0dd49bdSEiji Ota /*
716e76cddSagiri  * This file contains code imported from the OFED rds source file recv.c
816e76cddSagiri  * Oracle elects to have and use the contents of rds_recv.c under and governed
916e76cddSagiri  * by the OpenIB.org BSD license (see below for full license text). However,
1016e76cddSagiri  * the following notice accompanied the original version of this file:
11c0dd49bdSEiji Ota  */
12c0dd49bdSEiji Ota 
13c0dd49bdSEiji Ota /*
14c0dd49bdSEiji Ota  * Copyright (c) 2006 Oracle.  All rights reserved.
15c0dd49bdSEiji Ota  *
16c0dd49bdSEiji Ota  * This software is available to you under a choice of one of two
17c0dd49bdSEiji Ota  * licenses.  You may choose to be licensed under the terms of the GNU
18c0dd49bdSEiji Ota  * General Public License (GPL) Version 2, available from the file
19c0dd49bdSEiji Ota  * COPYING in the main directory of this source tree, or the
20c0dd49bdSEiji Ota  * OpenIB.org BSD license below:
21c0dd49bdSEiji Ota  *
22c0dd49bdSEiji Ota  *     Redistribution and use in source and binary forms, with or
23c0dd49bdSEiji Ota  *     without modification, are permitted provided that the following
24c0dd49bdSEiji Ota  *     conditions are met:
25c0dd49bdSEiji Ota  *
26c0dd49bdSEiji Ota  *      - Redistributions of source code must retain the above
27c0dd49bdSEiji Ota  *        copyright notice, this list of conditions and the following
28c0dd49bdSEiji Ota  *        disclaimer.
29c0dd49bdSEiji Ota  *
30c0dd49bdSEiji Ota  *      - Redistributions in binary form must reproduce the above
31c0dd49bdSEiji Ota  *        copyright notice, this list of conditions and the following
32c0dd49bdSEiji Ota  *        disclaimer in the documentation and/or other materials
33c0dd49bdSEiji Ota  *        provided with the distribution.
34c0dd49bdSEiji Ota  *
35c0dd49bdSEiji Ota  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36c0dd49bdSEiji Ota  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37c0dd49bdSEiji Ota  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
38c0dd49bdSEiji Ota  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
39c0dd49bdSEiji Ota  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
40c0dd49bdSEiji Ota  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
41c0dd49bdSEiji Ota  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
42c0dd49bdSEiji Ota  * SOFTWARE.
43c0dd49bdSEiji Ota  *
44c0dd49bdSEiji Ota  */
45c0dd49bdSEiji Ota #include <sys/rds.h>
46c0dd49bdSEiji Ota 
47c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3.h>
48c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdma.h>
49c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
50c0dd49bdSEiji Ota 
51c0dd49bdSEiji Ota void
rdsv3_inc_init(struct rdsv3_incoming * inc,struct rdsv3_connection * conn,uint32_be_t saddr)52c0dd49bdSEiji Ota rdsv3_inc_init(struct rdsv3_incoming *inc, struct rdsv3_connection *conn,
53c0dd49bdSEiji Ota     uint32_be_t saddr)
54c0dd49bdSEiji Ota {
55c0dd49bdSEiji Ota 	RDSV3_DPRINTF5("rdsv3_inc_init", "Enter(inc: %p, conn: %p)", inc, conn);
56c0dd49bdSEiji Ota 	inc->i_refcount = 1;
57c0dd49bdSEiji Ota 	list_link_init(&inc->i_item);
58c0dd49bdSEiji Ota 	inc->i_conn = conn;
59c0dd49bdSEiji Ota 	inc->i_saddr = saddr;
60c0dd49bdSEiji Ota 	inc->i_rdma_cookie = 0;
61c0dd49bdSEiji Ota }
62c0dd49bdSEiji Ota 
63c0dd49bdSEiji Ota void
rdsv3_inc_addref(struct rdsv3_incoming * inc)64c0dd49bdSEiji Ota rdsv3_inc_addref(struct rdsv3_incoming *inc)
65c0dd49bdSEiji Ota {
66c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_inc_addref",
67c0dd49bdSEiji Ota 	    "addref inc %p ref %d", inc, atomic_get(&inc->i_refcount));
681a5e258fSJosef 'Jeff' Sipek 	atomic_inc_32(&inc->i_refcount);
69c0dd49bdSEiji Ota }
70c0dd49bdSEiji Ota 
71c0dd49bdSEiji Ota void
rdsv3_inc_put(struct rdsv3_incoming * inc)72c0dd49bdSEiji Ota rdsv3_inc_put(struct rdsv3_incoming *inc)
73c0dd49bdSEiji Ota {
74c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_inc_put", "put inc %p ref %d",
75c0dd49bdSEiji Ota 	    inc, atomic_get(&inc->i_refcount));
76c0dd49bdSEiji Ota 	if (atomic_dec_and_test(&inc->i_refcount)) {
77c0dd49bdSEiji Ota 		ASSERT(!list_link_active(&inc->i_item));
78c0dd49bdSEiji Ota 
79c0dd49bdSEiji Ota 		inc->i_conn->c_trans->inc_free(inc);
80c0dd49bdSEiji Ota 	}
81c0dd49bdSEiji Ota }
82c0dd49bdSEiji Ota 
83c0dd49bdSEiji Ota /*ARGSUSED*/
84c0dd49bdSEiji Ota static void
rdsv3_recv_rcvbuf_delta(struct rdsv3_sock * rs,struct rsock * sk,struct rdsv3_cong_map * map,int delta,uint16_be_t port)85c0dd49bdSEiji Ota rdsv3_recv_rcvbuf_delta(struct rdsv3_sock *rs, struct rsock *sk,
86c0dd49bdSEiji Ota     struct rdsv3_cong_map *map,
87c0dd49bdSEiji Ota     int delta, uint16_be_t port)
88c0dd49bdSEiji Ota {
89c0dd49bdSEiji Ota 	int now_congested;
90c0dd49bdSEiji Ota 
91c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_recv_rcvbuf_delta",
92c0dd49bdSEiji Ota 	    "Enter(rs: %p, map: %p, delta: %d, port: %d)",
93c0dd49bdSEiji Ota 	    rs, map, delta, port);
94c0dd49bdSEiji Ota 
95c0dd49bdSEiji Ota 	if (delta == 0)
96c0dd49bdSEiji Ota 		return;
97c0dd49bdSEiji Ota 
98c0dd49bdSEiji Ota 	rs->rs_rcv_bytes += delta;
99c0dd49bdSEiji Ota 	now_congested = rs->rs_rcv_bytes > rdsv3_sk_rcvbuf(rs);
100c0dd49bdSEiji Ota 
101c0dd49bdSEiji Ota 	RDSV3_DPRINTF5("rdsv3_recv_rcvbuf_delta",
102c0dd49bdSEiji Ota 	    "rs %p (%u.%u.%u.%u:%u) recv bytes %d buf %d "
103c0dd49bdSEiji Ota 	    "now_cong %d delta %d",
104c0dd49bdSEiji Ota 	    rs, NIPQUAD(rs->rs_bound_addr),
105c0dd49bdSEiji Ota 	    (int)ntohs(rs->rs_bound_port), rs->rs_rcv_bytes,
106c0dd49bdSEiji Ota 	    rdsv3_sk_rcvbuf(rs), now_congested, delta);
107c0dd49bdSEiji Ota 
108c0dd49bdSEiji Ota 	/* wasn't -> am congested */
109c0dd49bdSEiji Ota 	if (!rs->rs_congested && now_congested) {
110c0dd49bdSEiji Ota 		rs->rs_congested = 1;
111c0dd49bdSEiji Ota 		rdsv3_cong_set_bit(map, port);
112c0dd49bdSEiji Ota 		rdsv3_cong_queue_updates(map);
113c0dd49bdSEiji Ota 	}
114c0dd49bdSEiji Ota 	/* was -> aren't congested */
115c0dd49bdSEiji Ota 	/*
116c0dd49bdSEiji Ota 	 * Require more free space before reporting uncongested to prevent
117c0dd49bdSEiji Ota 	 * bouncing cong/uncong state too often
118c0dd49bdSEiji Ota 	 */
119c0dd49bdSEiji Ota 	else if (rs->rs_congested &&
120c0dd49bdSEiji Ota 	    (rs->rs_rcv_bytes < (rdsv3_sk_rcvbuf(rs)/2))) {
121c0dd49bdSEiji Ota 		rs->rs_congested = 0;
122c0dd49bdSEiji Ota 		rdsv3_cong_clear_bit(map, port);
123c0dd49bdSEiji Ota 		rdsv3_cong_queue_updates(map);
124c0dd49bdSEiji Ota 	}
125c0dd49bdSEiji Ota 
126c0dd49bdSEiji Ota 	/* do nothing if no change in cong state */
127c0dd49bdSEiji Ota 
128c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_recv_rcvbuf_delta", "Return(rs: %p)", rs);
129c0dd49bdSEiji Ota }
130c0dd49bdSEiji Ota 
131c0dd49bdSEiji Ota /*
132c0dd49bdSEiji Ota  * Process all extension headers that come with this message.
133c0dd49bdSEiji Ota  */
134c0dd49bdSEiji Ota static void
rdsv3_recv_incoming_exthdrs(struct rdsv3_incoming * inc,struct rdsv3_sock * rs)135c0dd49bdSEiji Ota rdsv3_recv_incoming_exthdrs(struct rdsv3_incoming *inc, struct rdsv3_sock *rs)
136c0dd49bdSEiji Ota {
137c0dd49bdSEiji Ota 	struct rdsv3_header *hdr = &inc->i_hdr;
138c0dd49bdSEiji Ota 	unsigned int pos = 0, type, len;
139c0dd49bdSEiji Ota 	union {
140c0dd49bdSEiji Ota 		struct rdsv3_ext_header_version version;
141c0dd49bdSEiji Ota 		struct rdsv3_ext_header_rdma rdma;
142c0dd49bdSEiji Ota 		struct rdsv3_ext_header_rdma_dest rdma_dest;
143c0dd49bdSEiji Ota 	} buffer;
144c0dd49bdSEiji Ota 
145c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_recv_incoming_exthdrs", "Enter");
146c0dd49bdSEiji Ota 	while (1) {
147c0dd49bdSEiji Ota 		len = sizeof (buffer);
148c0dd49bdSEiji Ota 		type = rdsv3_message_next_extension(hdr, &pos, &buffer, &len);
149c0dd49bdSEiji Ota 		if (type == RDSV3_EXTHDR_NONE)
150c0dd49bdSEiji Ota 			break;
151c0dd49bdSEiji Ota 		RDSV3_DPRINTF4("recv_incoming_exthdrs", "type %d", type);
152c0dd49bdSEiji Ota 		/* Process extension header here */
153c0dd49bdSEiji Ota 		switch (type) {
154c0dd49bdSEiji Ota 		case RDSV3_EXTHDR_RDMA:
155c0dd49bdSEiji Ota 			rdsv3_rdma_unuse(rs, ntohl(buffer.rdma.h_rdma_rkey),
156c0dd49bdSEiji Ota 			    0);
157c0dd49bdSEiji Ota 			break;
158c0dd49bdSEiji Ota 
159c0dd49bdSEiji Ota 		case RDSV3_EXTHDR_RDMA_DEST:
160c0dd49bdSEiji Ota 			/*
161c0dd49bdSEiji Ota 			 * We ignore the size for now. We could stash it
162c0dd49bdSEiji Ota 			 * somewhere and use it for error checking.
163c0dd49bdSEiji Ota 			 */
164c0dd49bdSEiji Ota 			inc->i_rdma_cookie = rdsv3_rdma_make_cookie(
165c0dd49bdSEiji Ota 			    ntohl(buffer.rdma_dest.h_rdma_rkey),
166c0dd49bdSEiji Ota 			    ntohl(buffer.rdma_dest.h_rdma_offset));
167c0dd49bdSEiji Ota 
168c0dd49bdSEiji Ota 			break;
169c0dd49bdSEiji Ota 		}
170c0dd49bdSEiji Ota 	}
171c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_recv_incoming_exthdrs", "Return");
172c0dd49bdSEiji Ota }
173c0dd49bdSEiji Ota 
174c0dd49bdSEiji Ota /*
175c0dd49bdSEiji Ota  * The transport must make sure that this is serialized against other
176c0dd49bdSEiji Ota  * rx and conn reset on this specific conn.
177c0dd49bdSEiji Ota  *
178c0dd49bdSEiji Ota  * We currently assert that only one fragmented message will be sent
179c0dd49bdSEiji Ota  * down a connection at a time.  This lets us reassemble in the conn
180c0dd49bdSEiji Ota  * instead of per-flow which means that we don't have to go digging through
181c0dd49bdSEiji Ota  * flows to tear down partial reassembly progress on conn failure and
182c0dd49bdSEiji Ota  * we save flow lookup and locking for each frag arrival.  It does mean
183c0dd49bdSEiji Ota  * that small messages will wait behind large ones.  Fragmenting at all
184c0dd49bdSEiji Ota  * is only to reduce the memory consumption of pre-posted buffers.
185c0dd49bdSEiji Ota  *
186c0dd49bdSEiji Ota  * The caller passes in saddr and daddr instead of us getting it from the
187c0dd49bdSEiji Ota  * conn.  This lets loopback, who only has one conn for both directions,
188c0dd49bdSEiji Ota  * tell us which roles the addrs in the conn are playing for this message.
189c0dd49bdSEiji Ota  */
190c0dd49bdSEiji Ota /* ARGSUSED */
191c0dd49bdSEiji Ota void
rdsv3_recv_incoming(struct rdsv3_connection * conn,uint32_be_t saddr,uint32_be_t daddr,struct rdsv3_incoming * inc,int gfp)192c0dd49bdSEiji Ota rdsv3_recv_incoming(struct rdsv3_connection *conn, uint32_be_t saddr,
193c0dd49bdSEiji Ota     uint32_be_t daddr, struct rdsv3_incoming *inc, int gfp)
194c0dd49bdSEiji Ota {
195c0dd49bdSEiji Ota 	struct rdsv3_sock *rs = NULL;
196c0dd49bdSEiji Ota 	struct rsock *sk;
197c0dd49bdSEiji Ota 
198c0dd49bdSEiji Ota 	inc->i_conn = conn;
199c0dd49bdSEiji Ota 	inc->i_rx_jiffies = jiffies;
200c0dd49bdSEiji Ota 
201c0dd49bdSEiji Ota 	RDSV3_DPRINTF5("rdsv3_recv_incoming",
202c0dd49bdSEiji Ota 	    "conn %p next %llu inc %p seq %llu len %u sport %u dport %u "
203c0dd49bdSEiji Ota 	    "flags 0x%x rx_jiffies %lu", conn,
204c0dd49bdSEiji Ota 	    (unsigned long long)conn->c_next_rx_seq,
205c0dd49bdSEiji Ota 	    inc,
206c0dd49bdSEiji Ota 	    (unsigned long long)ntohll(inc->i_hdr.h_sequence),
207c0dd49bdSEiji Ota 	    ntohl(inc->i_hdr.h_len),
208c0dd49bdSEiji Ota 	    ntohs(inc->i_hdr.h_sport),
209c0dd49bdSEiji Ota 	    ntohs(inc->i_hdr.h_dport),
210c0dd49bdSEiji Ota 	    inc->i_hdr.h_flags,
211c0dd49bdSEiji Ota 	    inc->i_rx_jiffies);
212c0dd49bdSEiji Ota 
213c0dd49bdSEiji Ota 	/*
214c0dd49bdSEiji Ota 	 * Sequence numbers should only increase.  Messages get their
215c0dd49bdSEiji Ota 	 * sequence number as they're queued in a sending conn.  They
216c0dd49bdSEiji Ota 	 * can be dropped, though, if the sending socket is closed before
217c0dd49bdSEiji Ota 	 * they hit the wire.  So sequence numbers can skip forward
218c0dd49bdSEiji Ota 	 * under normal operation.  They can also drop back in the conn
219c0dd49bdSEiji Ota 	 * failover case as previously sent messages are resent down the
220c0dd49bdSEiji Ota 	 * new instance of a conn.  We drop those, otherwise we have
221c0dd49bdSEiji Ota 	 * to assume that the next valid seq does not come after a
222c0dd49bdSEiji Ota 	 * hole in the fragment stream.
223c0dd49bdSEiji Ota 	 *
224c0dd49bdSEiji Ota 	 * The headers don't give us a way to realize if fragments of
225c0dd49bdSEiji Ota 	 * a message have been dropped.  We assume that frags that arrive
226c0dd49bdSEiji Ota 	 * to a flow are part of the current message on the flow that is
227c0dd49bdSEiji Ota 	 * being reassembled.  This means that senders can't drop messages
228c0dd49bdSEiji Ota 	 * from the sending conn until all their frags are sent.
229c0dd49bdSEiji Ota 	 *
230c0dd49bdSEiji Ota 	 * XXX we could spend more on the wire to get more robust failure
231c0dd49bdSEiji Ota 	 * detection, arguably worth it to avoid data corruption.
232c0dd49bdSEiji Ota 	 */
233c0dd49bdSEiji Ota 	if (ntohll(inc->i_hdr.h_sequence) < conn->c_next_rx_seq &&
234c0dd49bdSEiji Ota 	    (inc->i_hdr.h_flags & RDSV3_FLAG_RETRANSMITTED)) {
235c0dd49bdSEiji Ota 		rdsv3_stats_inc(s_recv_drop_old_seq);
236c0dd49bdSEiji Ota 		goto out;
237c0dd49bdSEiji Ota 	}
238c0dd49bdSEiji Ota 	conn->c_next_rx_seq = ntohll(inc->i_hdr.h_sequence) + 1;
239c0dd49bdSEiji Ota 
240c0dd49bdSEiji Ota 	if (rdsv3_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
241c0dd49bdSEiji Ota 		rdsv3_stats_inc(s_recv_ping);
242c0dd49bdSEiji Ota 		(void) rdsv3_send_pong(conn, inc->i_hdr.h_sport);
243c0dd49bdSEiji Ota 		goto out;
244c0dd49bdSEiji Ota 	}
245c0dd49bdSEiji Ota 
24680166370Sagiri 	rs = rdsv3_find_bound(conn, inc->i_hdr.h_dport);
2475d5562f5SEiji Ota 	if (!rs) {
248c0dd49bdSEiji Ota 		rdsv3_stats_inc(s_recv_drop_no_sock);
249c0dd49bdSEiji Ota 		goto out;
250c0dd49bdSEiji Ota 	}
251c0dd49bdSEiji Ota 
252c0dd49bdSEiji Ota 	/* Process extension headers */
253c0dd49bdSEiji Ota 	rdsv3_recv_incoming_exthdrs(inc, rs);
254c0dd49bdSEiji Ota 
255c0dd49bdSEiji Ota 	/* We can be racing with rdsv3_release() which marks the socket dead. */
256c0dd49bdSEiji Ota 	sk = rdsv3_rs_to_sk(rs);
257c0dd49bdSEiji Ota 
258c0dd49bdSEiji Ota 	/* serialize with rdsv3_release -> sock_orphan */
259c0dd49bdSEiji Ota 	rw_enter(&rs->rs_recv_lock, RW_WRITER);
260c0dd49bdSEiji Ota 	if (!rdsv3_sk_sock_flag(sk, SOCK_DEAD)) {
261c0dd49bdSEiji Ota 		int error, bytes;
262c0dd49bdSEiji Ota 		RDSV3_DPRINTF5("rdsv3_recv_incoming",
263c0dd49bdSEiji Ota 		    "adding inc %p to rs %p's recv queue", inc, rs);
264c0dd49bdSEiji Ota 		rdsv3_stats_inc(s_recv_queued);
265c0dd49bdSEiji Ota 		rdsv3_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
266c0dd49bdSEiji Ota 		    ntohl(inc->i_hdr.h_len),
267c0dd49bdSEiji Ota 		    inc->i_hdr.h_dport);
268c0dd49bdSEiji Ota 		rdsv3_inc_addref(inc);
269c0dd49bdSEiji Ota 		list_insert_tail(&rs->rs_recv_queue, inc);
270c0dd49bdSEiji Ota 		bytes = rs->rs_rcv_bytes;
271c0dd49bdSEiji Ota 		rw_exit(&rs->rs_recv_lock);
272c0dd49bdSEiji Ota 
273c0dd49bdSEiji Ota 		__rdsv3_wake_sk_sleep(sk);
274c0dd49bdSEiji Ota 
275c0dd49bdSEiji Ota 		/* wake up anyone waiting in poll */
276c0dd49bdSEiji Ota 		sk->sk_upcalls->su_recv(sk->sk_upper_handle, NULL,
277c0dd49bdSEiji Ota 		    bytes, 0, &error, NULL);
278c0dd49bdSEiji Ota 		if (error != 0) {
279c0dd49bdSEiji Ota 			RDSV3_DPRINTF2("rdsv3_recv_incoming",
280c0dd49bdSEiji Ota 			    "su_recv returned: %d", error);
281c0dd49bdSEiji Ota 		}
282c0dd49bdSEiji Ota 	} else {
283c0dd49bdSEiji Ota 		rdsv3_stats_inc(s_recv_drop_dead_sock);
284c0dd49bdSEiji Ota 		rw_exit(&rs->rs_recv_lock);
285c0dd49bdSEiji Ota 	}
286c0dd49bdSEiji Ota 
287c0dd49bdSEiji Ota out:
288c0dd49bdSEiji Ota 	if (rs)
289c0dd49bdSEiji Ota 		rdsv3_sock_put(rs);
290c0dd49bdSEiji Ota }
291c0dd49bdSEiji Ota 
292c0dd49bdSEiji Ota /*
293c0dd49bdSEiji Ota  * be very careful here.  This is being called as the condition in
294c0dd49bdSEiji Ota  * wait_event_*() needs to cope with being called many times.
295c0dd49bdSEiji Ota  */
296c0dd49bdSEiji Ota static int
rdsv3_next_incoming(struct rdsv3_sock * rs,struct rdsv3_incoming ** inc)297c0dd49bdSEiji Ota rdsv3_next_incoming(struct rdsv3_sock *rs, struct rdsv3_incoming **inc)
298c0dd49bdSEiji Ota {
2995d5562f5SEiji Ota 	if (!*inc) {
300c0dd49bdSEiji Ota 		rw_enter(&rs->rs_recv_lock, RW_READER);
301c0dd49bdSEiji Ota 		if (!list_is_empty(&rs->rs_recv_queue)) {
302c0dd49bdSEiji Ota 			*inc = list_head(&rs->rs_recv_queue);
303c0dd49bdSEiji Ota 			rdsv3_inc_addref(*inc);
304c0dd49bdSEiji Ota 		}
305c0dd49bdSEiji Ota 		rw_exit(&rs->rs_recv_lock);
306c0dd49bdSEiji Ota 	}
307c0dd49bdSEiji Ota 
308c0dd49bdSEiji Ota 	return (*inc != NULL);
309c0dd49bdSEiji Ota }
310c0dd49bdSEiji Ota 
311c0dd49bdSEiji Ota static int
rdsv3_still_queued(struct rdsv3_sock * rs,struct rdsv3_incoming * inc,int drop)312c0dd49bdSEiji Ota rdsv3_still_queued(struct rdsv3_sock *rs, struct rdsv3_incoming *inc,
313c0dd49bdSEiji Ota     int drop)
314c0dd49bdSEiji Ota {
315c0dd49bdSEiji Ota 	struct rsock *sk = rdsv3_rs_to_sk(rs);
316c0dd49bdSEiji Ota 	int ret = 0;
317c0dd49bdSEiji Ota 
318c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_still_queued", "Enter rs: %p inc: %p drop: %d",
319c0dd49bdSEiji Ota 	    rs, inc, drop);
320c0dd49bdSEiji Ota 
321c0dd49bdSEiji Ota 	rw_enter(&rs->rs_recv_lock, RW_WRITER);
322c0dd49bdSEiji Ota 	if (list_link_active(&inc->i_item)) {
323c0dd49bdSEiji Ota 		ret = 1;
324c0dd49bdSEiji Ota 		if (drop) {
325c0dd49bdSEiji Ota 			/* XXX make sure this i_conn is reliable */
326c0dd49bdSEiji Ota 			rdsv3_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
327c0dd49bdSEiji Ota 			    -ntohl(inc->i_hdr.h_len),
328c0dd49bdSEiji Ota 			    inc->i_hdr.h_dport);
329c0dd49bdSEiji Ota 			list_remove_node(&inc->i_item);
330c0dd49bdSEiji Ota 			rdsv3_inc_put(inc);
331c0dd49bdSEiji Ota 		}
332c0dd49bdSEiji Ota 	}
333c0dd49bdSEiji Ota 	rw_exit(&rs->rs_recv_lock);
334c0dd49bdSEiji Ota 
335c0dd49bdSEiji Ota 	RDSV3_DPRINTF5("rdsv3_still_queued",
336c0dd49bdSEiji Ota 	    "inc %p rs %p still %d dropped %d", inc, rs, ret, drop);
337c0dd49bdSEiji Ota 	return (ret);
338c0dd49bdSEiji Ota }
339c0dd49bdSEiji Ota 
340c0dd49bdSEiji Ota /*
341c0dd49bdSEiji Ota  * Pull errors off the error queue.
342c0dd49bdSEiji Ota  * If msghdr is NULL, we will just purge the error queue.
343c0dd49bdSEiji Ota  */
344c0dd49bdSEiji Ota int
rdsv3_notify_queue_get(struct rdsv3_sock * rs,struct msghdr * msghdr)345c0dd49bdSEiji Ota rdsv3_notify_queue_get(struct rdsv3_sock *rs, struct msghdr *msghdr)
346c0dd49bdSEiji Ota {
347c0dd49bdSEiji Ota 	struct rdsv3_notifier *notifier;
348fe817b60SEiji Ota 	struct rds_rdma_notify cmsg;
349c0dd49bdSEiji Ota 	unsigned int count = 0, max_messages = ~0U;
350c0dd49bdSEiji Ota 	list_t copy;
351c0dd49bdSEiji Ota 	int err = 0;
352c0dd49bdSEiji Ota 
353c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_notify_queue_get", "Enter(rs: %p)", rs);
354c0dd49bdSEiji Ota 
355c0dd49bdSEiji Ota 	list_create(&copy, sizeof (struct rdsv3_notifier),
356c0dd49bdSEiji Ota 	    offsetof(struct rdsv3_notifier, n_list));
357c0dd49bdSEiji Ota 
358c0dd49bdSEiji Ota 
359c0dd49bdSEiji Ota 	/*
360c0dd49bdSEiji Ota 	 * put_cmsg copies to user space and thus may sleep. We can't do this
361c0dd49bdSEiji Ota 	 * with rs_lock held, so first grab as many notifications as we can
362c0dd49bdSEiji Ota 	 * stuff
363c0dd49bdSEiji Ota 	 * in the user provided cmsg buffer. We don't try to copy more, to avoid
364c0dd49bdSEiji Ota 	 * losing notifications - except when the buffer is so small that
365c0dd49bdSEiji Ota 	 * it wouldn't
366*48bbca81SDaniel Hoffman 	 * even hold a single notification. Then we give as much of this
367c0dd49bdSEiji Ota 	 * single
368c0dd49bdSEiji Ota 	 * msg as we can squeeze in, and set MSG_CTRUNC.
369c0dd49bdSEiji Ota 	 */
370c0dd49bdSEiji Ota 	if (msghdr) {
371c0dd49bdSEiji Ota 		max_messages =
372c0dd49bdSEiji Ota 		    msghdr->msg_controllen / CMSG_SPACE(sizeof (cmsg));
373c0dd49bdSEiji Ota 		if (!max_messages)
374c0dd49bdSEiji Ota 			max_messages = 1;
375c0dd49bdSEiji Ota 	}
376c0dd49bdSEiji Ota 
377c0dd49bdSEiji Ota 	mutex_enter(&rs->rs_lock);
378c0dd49bdSEiji Ota 	while (!list_is_empty(&rs->rs_notify_queue) && count < max_messages) {
379c0dd49bdSEiji Ota 		notifier = list_remove_head(&rs->rs_notify_queue);
380c0dd49bdSEiji Ota 		list_insert_tail(&copy, notifier);
381c0dd49bdSEiji Ota 		count++;
382c0dd49bdSEiji Ota 	}
383c0dd49bdSEiji Ota 	mutex_exit(&rs->rs_lock);
384c0dd49bdSEiji Ota 
385c0dd49bdSEiji Ota 	if (!count)
386c0dd49bdSEiji Ota 		return (0);
387c0dd49bdSEiji Ota 
388c0dd49bdSEiji Ota 	while (!list_is_empty(&copy)) {
389c0dd49bdSEiji Ota 		notifier = list_remove_head(&copy);
390c0dd49bdSEiji Ota 
391c0dd49bdSEiji Ota 		if (msghdr) {
392c0dd49bdSEiji Ota 			cmsg.user_token = notifier->n_user_token;
393c0dd49bdSEiji Ota 			cmsg.status  = notifier->n_status;
394c0dd49bdSEiji Ota 
395c0dd49bdSEiji Ota 			err = rdsv3_put_cmsg(msghdr, SOL_RDS,
396fe817b60SEiji Ota 			    RDS_CMSG_RDMA_STATUS, sizeof (cmsg), &cmsg);
397c0dd49bdSEiji Ota 			if (err)
398c0dd49bdSEiji Ota 				break;
399c0dd49bdSEiji Ota 		}
400c0dd49bdSEiji Ota 
401c0dd49bdSEiji Ota 		kmem_free(notifier, sizeof (struct rdsv3_notifier));
402c0dd49bdSEiji Ota 	}
403c0dd49bdSEiji Ota 
404c0dd49bdSEiji Ota 	/*
405c0dd49bdSEiji Ota 	 * If we bailed out because of an error in put_cmsg,
406c0dd49bdSEiji Ota 	 * we may be left with one or more notifications that we
407c0dd49bdSEiji Ota 	 * didn't process. Return them to the head of the list.
408c0dd49bdSEiji Ota 	 */
409c0dd49bdSEiji Ota 	if (!list_is_empty(&copy)) {
410c0dd49bdSEiji Ota 		mutex_enter(&rs->rs_lock);
411c0dd49bdSEiji Ota 		list_splice(&copy, &rs->rs_notify_queue);
412c0dd49bdSEiji Ota 		mutex_exit(&rs->rs_lock);
413c0dd49bdSEiji Ota 	}
414c0dd49bdSEiji Ota 
415c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_notify_queue_get", "Return(rs: %p)", rs);
416c0dd49bdSEiji Ota 
417c0dd49bdSEiji Ota 	return (err);
418c0dd49bdSEiji Ota }
419c0dd49bdSEiji Ota 
420c0dd49bdSEiji Ota /*
421c0dd49bdSEiji Ota  * Queue a congestion notification
422c0dd49bdSEiji Ota  */
423c0dd49bdSEiji Ota static int
rdsv3_notify_cong(struct rdsv3_sock * rs,struct msghdr * msghdr)424c0dd49bdSEiji Ota rdsv3_notify_cong(struct rdsv3_sock *rs, struct msghdr *msghdr)
425c0dd49bdSEiji Ota {
426c0dd49bdSEiji Ota 	uint64_t notify = rs->rs_cong_notify;
427c0dd49bdSEiji Ota 	int err;
428c0dd49bdSEiji Ota 
429fe817b60SEiji Ota 	err = rdsv3_put_cmsg(msghdr, SOL_RDS, RDS_CMSG_CONG_UPDATE,
430c0dd49bdSEiji Ota 	    sizeof (notify), &notify);
431c0dd49bdSEiji Ota 	if (err)
432c0dd49bdSEiji Ota 		return (err);
433c0dd49bdSEiji Ota 
434c0dd49bdSEiji Ota 	mutex_enter(&rs->rs_lock);
435c0dd49bdSEiji Ota 	rs->rs_cong_notify &= ~notify;
436c0dd49bdSEiji Ota 	mutex_exit(&rs->rs_lock);
437c0dd49bdSEiji Ota 
438c0dd49bdSEiji Ota 	return (0);
439c0dd49bdSEiji Ota }
440c0dd49bdSEiji Ota 
441c0dd49bdSEiji Ota /*
442c0dd49bdSEiji Ota  * Receive any control messages.
443c0dd49bdSEiji Ota  */
444c0dd49bdSEiji Ota static int
rdsv3_cmsg_recv(struct rdsv3_incoming * inc,struct msghdr * msg)445c0dd49bdSEiji Ota rdsv3_cmsg_recv(struct rdsv3_incoming *inc, struct msghdr *msg)
446c0dd49bdSEiji Ota {
4479b3d509cSEiji Ota 	int ret = 0;
4489b3d509cSEiji Ota 	if (inc->i_rdma_cookie) {
4499b3d509cSEiji Ota 		ret = rdsv3_put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
4509b3d509cSEiji Ota 		    sizeof (inc->i_rdma_cookie), &inc->i_rdma_cookie);
4519b3d509cSEiji Ota 	}
4529b3d509cSEiji Ota 	return (ret);
453c0dd49bdSEiji Ota }
454c0dd49bdSEiji Ota 
455c0dd49bdSEiji Ota int
rdsv3_recvmsg(struct rdsv3_sock * rs,uio_t * uio,struct nmsghdr * msg,size_t size,int msg_flags)456c0dd49bdSEiji Ota rdsv3_recvmsg(struct rdsv3_sock *rs, uio_t *uio,
457c0dd49bdSEiji Ota     struct nmsghdr *msg, size_t size, int msg_flags)
458c0dd49bdSEiji Ota {
459c0dd49bdSEiji Ota 	struct rsock *sk = rdsv3_rs_to_sk(rs);
460b27516f5Sagiri 	int ret = 0;
461c0dd49bdSEiji Ota 	struct sockaddr_in *sin = NULL;
462c0dd49bdSEiji Ota 	struct rdsv3_incoming *inc = NULL;
463b27516f5Sagiri 	boolean_t nonblock = B_FALSE;
464c0dd49bdSEiji Ota 
465c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_recvmsg",
466c0dd49bdSEiji Ota 	    "Enter(rs: %p size: %d msg_flags: 0x%x)", rs, size, msg_flags);
467c0dd49bdSEiji Ota 
468b27516f5Sagiri 	if ((uio->uio_fmode & (FNDELAY | FNONBLOCK)) ||
469b27516f5Sagiri 	    (msg_flags & MSG_DONTWAIT))
470b27516f5Sagiri 		nonblock = B_TRUE;
471b27516f5Sagiri 
472c0dd49bdSEiji Ota 	if (msg_flags & MSG_OOB)
473c0dd49bdSEiji Ota 		goto out;
474c0dd49bdSEiji Ota 
475c0dd49bdSEiji Ota 	/* mark the first cmsg position */
476c0dd49bdSEiji Ota 	if (msg) {
477c0dd49bdSEiji Ota 		msg->msg_control = NULL;
478c0dd49bdSEiji Ota 	}
479c0dd49bdSEiji Ota 
480c0dd49bdSEiji Ota 	while (1) {
481c0dd49bdSEiji Ota 		/*
482c0dd49bdSEiji Ota 		 * If there are pending notifications, do those -
483c0dd49bdSEiji Ota 		 * and nothing else
484c0dd49bdSEiji Ota 		 */
485c0dd49bdSEiji Ota 		if (!list_is_empty(&rs->rs_notify_queue)) {
486c0dd49bdSEiji Ota 			ret = rdsv3_notify_queue_get(rs, msg);
487c0dd49bdSEiji Ota 
488c0dd49bdSEiji Ota 			if (msg && msg->msg_namelen) {
489c0dd49bdSEiji Ota 				sin = kmem_zalloc(sizeof (struct sockaddr_in),
490c0dd49bdSEiji Ota 				    KM_SLEEP);
491c0dd49bdSEiji Ota 				sin->sin_family = AF_INET_OFFLOAD;
492c0dd49bdSEiji Ota 				if (inc) {
493c0dd49bdSEiji Ota 					sin->sin_port = inc->i_hdr.h_sport;
494c0dd49bdSEiji Ota 					sin->sin_addr.s_addr = inc->i_saddr;
495c0dd49bdSEiji Ota 				}
496c0dd49bdSEiji Ota 				msg->msg_namelen = sizeof (struct sockaddr_in);
497c0dd49bdSEiji Ota 				msg->msg_name = sin;
498c0dd49bdSEiji Ota 			}
499c0dd49bdSEiji Ota 			break;
500c0dd49bdSEiji Ota 		}
501c0dd49bdSEiji Ota 
502c0dd49bdSEiji Ota 		if (rs->rs_cong_notify) {
503c0dd49bdSEiji Ota 			ret = rdsv3_notify_cong(rs, msg);
504c0dd49bdSEiji Ota 			goto out;
505c0dd49bdSEiji Ota 		}
506c0dd49bdSEiji Ota 
507c0dd49bdSEiji Ota 		if (!rdsv3_next_incoming(rs, &inc)) {
508c0dd49bdSEiji Ota 			if (nonblock) {
509c0dd49bdSEiji Ota 				ret = -EAGAIN;
510c0dd49bdSEiji Ota 				break;
511c0dd49bdSEiji Ota 			}
512c0dd49bdSEiji Ota 
513c0dd49bdSEiji Ota 			RDSV3_DPRINTF3("rdsv3_recvmsg",
514c0dd49bdSEiji Ota 			    "Before wait (rs: %p)", rs);
515c0dd49bdSEiji Ota 
5166e18d381Sagiri #if 0
5176e18d381Sagiri 			ret = rdsv3_wait_sig(sk->sk_sleep,
5186e18d381Sagiri 			    !(list_is_empty(&rs->rs_notify_queue) &&
5196e18d381Sagiri 			    !rs->rs_cong_notify &&
5206e18d381Sagiri 			    !rdsv3_next_incoming(rs, &inc)));
5216e18d381Sagiri 			if (ret == 0) {
5226e18d381Sagiri 				/* signal/timeout pending */
5236e18d381Sagiri 				RDSV3_DPRINTF2("rdsv3_recvmsg",
5246e18d381Sagiri 				    "woke due to signal");
5256e18d381Sagiri 				ret = -ERESTART;
5266e18d381Sagiri 			}
5276e18d381Sagiri #else
528c0dd49bdSEiji Ota 			mutex_enter(&sk->sk_sleep->waitq_mutex);
5296e18d381Sagiri 			sk->sk_sleep->waitq_waiters++;
530c0dd49bdSEiji Ota 			while ((list_is_empty(&rs->rs_notify_queue) &&
531c0dd49bdSEiji Ota 			    !rs->rs_cong_notify &&
532c0dd49bdSEiji Ota 			    !rdsv3_next_incoming(rs, &inc))) {
533c0dd49bdSEiji Ota 				ret = cv_wait_sig(&sk->sk_sleep->waitq_cv,
534c0dd49bdSEiji Ota 				    &sk->sk_sleep->waitq_mutex);
535c0dd49bdSEiji Ota 				if (ret == 0) {
536c0dd49bdSEiji Ota 					/* signal/timeout pending */
537c0dd49bdSEiji Ota 					RDSV3_DPRINTF2("rdsv3_recvmsg",
538c0dd49bdSEiji Ota 					    "woke due to signal");
5395e12ddadSEiji Ota 					ret = -EINTR;
540c0dd49bdSEiji Ota 					break;
541c0dd49bdSEiji Ota 				}
542c0dd49bdSEiji Ota 			}
5436e18d381Sagiri 			sk->sk_sleep->waitq_waiters--;
544c0dd49bdSEiji Ota 			mutex_exit(&sk->sk_sleep->waitq_mutex);
5456e18d381Sagiri #endif
546c0dd49bdSEiji Ota 
547c0dd49bdSEiji Ota 			RDSV3_DPRINTF5("rdsv3_recvmsg",
548c0dd49bdSEiji Ota 			    "recvmsg woke rs: %p inc %p ret %d",
549c0dd49bdSEiji Ota 			    rs, inc, -ret);
550c0dd49bdSEiji Ota 
551c0dd49bdSEiji Ota 			if (ret < 0)
552c0dd49bdSEiji Ota 				break;
553c0dd49bdSEiji Ota 
554c0dd49bdSEiji Ota 			/*
555c0dd49bdSEiji Ota 			 * if the wakeup was due to rs_notify_queue or
556c0dd49bdSEiji Ota 			 * rs_cong_notify then we need to handle those first.
557c0dd49bdSEiji Ota 			 */
558c0dd49bdSEiji Ota 			continue;
559c0dd49bdSEiji Ota 		}
560c0dd49bdSEiji Ota 
561c0dd49bdSEiji Ota 		RDSV3_DPRINTF5("rdsv3_recvmsg",
562c0dd49bdSEiji Ota 		    "copying inc %p from %u.%u.%u.%u:%u to user", inc,
563c0dd49bdSEiji Ota 		    NIPQUAD(inc->i_conn->c_faddr),
564c0dd49bdSEiji Ota 		    ntohs(inc->i_hdr.h_sport));
565cadbfdc3SEiji Ota 
566c0dd49bdSEiji Ota 		ret = inc->i_conn->c_trans->inc_copy_to_user(inc, uio, size);
567c0dd49bdSEiji Ota 		if (ret < 0)
568c0dd49bdSEiji Ota 			break;
569c0dd49bdSEiji Ota 
570c0dd49bdSEiji Ota 		/*
571c0dd49bdSEiji Ota 		 * if the message we just copied isn't at the head of the
572c0dd49bdSEiji Ota 		 * recv queue then someone else raced us to return it, try
573c0dd49bdSEiji Ota 		 * to get the next message.
574c0dd49bdSEiji Ota 		 */
575c0dd49bdSEiji Ota 		if (!rdsv3_still_queued(rs, inc, !(msg_flags & MSG_PEEK))) {
576c0dd49bdSEiji Ota 			rdsv3_inc_put(inc);
577c0dd49bdSEiji Ota 			inc = NULL;
578c0dd49bdSEiji Ota 			rdsv3_stats_inc(s_recv_deliver_raced);
579c0dd49bdSEiji Ota 			continue;
580c0dd49bdSEiji Ota 		}
581c0dd49bdSEiji Ota 
582c0dd49bdSEiji Ota 		if (ret < ntohl(inc->i_hdr.h_len)) {
583c0dd49bdSEiji Ota 			if (msg_flags & MSG_TRUNC)
584c0dd49bdSEiji Ota 				ret = ntohl(inc->i_hdr.h_len);
585c0dd49bdSEiji Ota 			msg->msg_flags |= MSG_TRUNC;
586c0dd49bdSEiji Ota 		}
587c0dd49bdSEiji Ota 
588c0dd49bdSEiji Ota 		if (rdsv3_cmsg_recv(inc, msg)) {
589c0dd49bdSEiji Ota 			ret = -EFAULT;
590c0dd49bdSEiji Ota 			goto out;
591c0dd49bdSEiji Ota 		}
592c0dd49bdSEiji Ota 
593c0dd49bdSEiji Ota 		rdsv3_stats_inc(s_recv_delivered);
594c0dd49bdSEiji Ota 
595c0dd49bdSEiji Ota 		if (msg->msg_namelen) {
596c0dd49bdSEiji Ota 			sin = kmem_alloc(sizeof (struct sockaddr_in), KM_SLEEP);
597c0dd49bdSEiji Ota 			sin->sin_family = AF_INET_OFFLOAD;
598c0dd49bdSEiji Ota 			sin->sin_port = inc->i_hdr.h_sport;
599c0dd49bdSEiji Ota 			sin->sin_addr.s_addr = inc->i_saddr;
600c0dd49bdSEiji Ota 			(void) memset(sin->sin_zero, 0,
601c0dd49bdSEiji Ota 			    sizeof (sin->sin_zero));
602c0dd49bdSEiji Ota 			msg->msg_namelen = sizeof (struct sockaddr_in);
603c0dd49bdSEiji Ota 			msg->msg_name = sin;
604c0dd49bdSEiji Ota 		}
605c0dd49bdSEiji Ota 		break;
606c0dd49bdSEiji Ota 	}
607c0dd49bdSEiji Ota 
608c0dd49bdSEiji Ota 	if (inc)
609c0dd49bdSEiji Ota 		rdsv3_inc_put(inc);
610c0dd49bdSEiji Ota 
611c0dd49bdSEiji Ota out:
6129b3d509cSEiji Ota 	if (msg && msg->msg_control == NULL)
6139b3d509cSEiji Ota 		msg->msg_controllen = 0;
6149b3d509cSEiji Ota 
615c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_recvmsg", "Return(rs: %p, ret: %d)", rs, ret);
616c0dd49bdSEiji Ota 
617c0dd49bdSEiji Ota 	return (ret);
618c0dd49bdSEiji Ota }
619c0dd49bdSEiji Ota 
620c0dd49bdSEiji Ota /*
621c0dd49bdSEiji Ota  * The socket is being shut down and we're asked to drop messages that were
622c0dd49bdSEiji Ota  * queued for recvmsg.  The caller has unbound the socket so the receive path
623c0dd49bdSEiji Ota  * won't queue any more incoming fragments or messages on the socket.
624c0dd49bdSEiji Ota  */
625c0dd49bdSEiji Ota void
rdsv3_clear_recv_queue(struct rdsv3_sock * rs)626c0dd49bdSEiji Ota rdsv3_clear_recv_queue(struct rdsv3_sock *rs)
627c0dd49bdSEiji Ota {
628c0dd49bdSEiji Ota 	struct rsock *sk = rdsv3_rs_to_sk(rs);
629c0dd49bdSEiji Ota 	struct rdsv3_incoming *inc, *tmp;
630c0dd49bdSEiji Ota 
631c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_clear_recv_queue", "Enter(rs: %p)", rs);
632c0dd49bdSEiji Ota 
633c0dd49bdSEiji Ota 	rw_enter(&rs->rs_recv_lock, RW_WRITER);
634c0dd49bdSEiji Ota 	RDSV3_FOR_EACH_LIST_NODE_SAFE(inc, tmp, &rs->rs_recv_queue, i_item) {
635c0dd49bdSEiji Ota 		rdsv3_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
636c0dd49bdSEiji Ota 		    -ntohl(inc->i_hdr.h_len),
637c0dd49bdSEiji Ota 		    inc->i_hdr.h_dport);
638c0dd49bdSEiji Ota 		list_remove_node(&inc->i_item);
639c0dd49bdSEiji Ota 		rdsv3_inc_put(inc);
640c0dd49bdSEiji Ota 	}
641c0dd49bdSEiji Ota 	rw_exit(&rs->rs_recv_lock);
642c0dd49bdSEiji Ota 
643c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_clear_recv_queue", "Return(rs: %p)", rs);
644c0dd49bdSEiji Ota }
645c0dd49bdSEiji Ota 
646c0dd49bdSEiji Ota /*
647c0dd49bdSEiji Ota  * inc->i_saddr isn't used here because it is only set in the receive
648c0dd49bdSEiji Ota  * path.
649c0dd49bdSEiji Ota  */
650c0dd49bdSEiji Ota void
rdsv3_inc_info_copy(struct rdsv3_incoming * inc,struct rdsv3_info_iterator * iter,uint32_be_t saddr,uint32_be_t daddr,int flip)651c0dd49bdSEiji Ota rdsv3_inc_info_copy(struct rdsv3_incoming *inc,
652c0dd49bdSEiji Ota     struct rdsv3_info_iterator *iter,
653c0dd49bdSEiji Ota     uint32_be_t saddr, uint32_be_t daddr, int flip)
654c0dd49bdSEiji Ota {
655fe817b60SEiji Ota 	struct rds_info_message minfo;
656c0dd49bdSEiji Ota 
657c0dd49bdSEiji Ota 	minfo.seq = ntohll(inc->i_hdr.h_sequence);
658c0dd49bdSEiji Ota 	minfo.len = ntohl(inc->i_hdr.h_len);
659c0dd49bdSEiji Ota 
660c0dd49bdSEiji Ota 	if (flip) {
661c0dd49bdSEiji Ota 		minfo.laddr = daddr;
662c0dd49bdSEiji Ota 		minfo.faddr = saddr;
663c0dd49bdSEiji Ota 		minfo.lport = inc->i_hdr.h_dport;
664c0dd49bdSEiji Ota 		minfo.fport = inc->i_hdr.h_sport;
665c0dd49bdSEiji Ota 	} else {
666c0dd49bdSEiji Ota 		minfo.laddr = saddr;
667c0dd49bdSEiji Ota 		minfo.faddr = daddr;
668c0dd49bdSEiji Ota 		minfo.lport = inc->i_hdr.h_sport;
669c0dd49bdSEiji Ota 		minfo.fport = inc->i_hdr.h_dport;
670c0dd49bdSEiji Ota 	}
671c0dd49bdSEiji Ota 
672c0dd49bdSEiji Ota 	rdsv3_info_copy(iter, &minfo, sizeof (minfo));
673c0dd49bdSEiji Ota }
674