xref: /illumos-gate/usr/src/uts/common/io/ib/clients/rdsv3/message.c (revision b27516f55237249607f754e6e42e865f12456675)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Copyright (c) 2006 Oracle.  All rights reserved.
27  *
28  * This software is available to you under a choice of one of two
29  * licenses.  You may choose to be licensed under the terms of the GNU
30  * General Public License (GPL) Version 2, available from the file
31  * COPYING in the main directory of this source tree, or the
32  * OpenIB.org BSD license below:
33  *
34  *     Redistribution and use in source and binary forms, with or
35  *     without modification, are permitted provided that the following
36  *     conditions are met:
37  *
38  *      - Redistributions of source code must retain the above
39  *        copyright notice, this list of conditions and the following
40  *        disclaimer.
41  *
42  *      - Redistributions in binary form must reproduce the above
43  *        copyright notice, this list of conditions and the following
44  *        disclaimer in the documentation and/or other materials
45  *        provided with the distribution.
46  *
47  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
51  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
53  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54  * SOFTWARE.
55  *
56  */
57 #include <sys/rds.h>
58 
59 #include <sys/ib/clients/rdsv3/rdsv3.h>
60 #include <sys/ib/clients/rdsv3/rdma.h>
61 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
62 
63 static rdsv3_wait_queue_t rdsv3_message_flush_waitq;
64 
65 #ifndef __lock_lint
66 static unsigned int	rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = {
67 [RDSV3_EXTHDR_NONE]	= 0,
68 [RDSV3_EXTHDR_VERSION]	= sizeof (struct rdsv3_ext_header_version),
69 [RDSV3_EXTHDR_RDMA]	= sizeof (struct rdsv3_ext_header_rdma),
70 [RDSV3_EXTHDR_RDMA_DEST]	= sizeof (struct rdsv3_ext_header_rdma_dest),
71 };
72 #else
73 static unsigned int	rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = {
74 			0,
75 			sizeof (struct rdsv3_ext_header_version),
76 			sizeof (struct rdsv3_ext_header_rdma),
77 			sizeof (struct rdsv3_ext_header_rdma_dest),
78 };
79 #endif
80 
81 void
82 rdsv3_message_addref(struct rdsv3_message *rm)
83 {
84 	RDSV3_DPRINTF5("rdsv3_message_addref", "addref rm %p ref %d",
85 	    rm, atomic_get(&rm->m_refcount));
86 	atomic_add_32(&rm->m_refcount, 1);
87 }
88 
89 /*
90  * This relies on dma_map_sg() not touching sg[].page during merging.
91  */
92 static void
93 rdsv3_message_purge(struct rdsv3_message *rm)
94 {
95 	unsigned long i;
96 
97 	RDSV3_DPRINTF4("rdsv3_message_purge", "Enter(rm: %p)", rm);
98 
99 	if (test_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags))
100 		return;
101 
102 	for (i = 0; i < rm->m_nents; i++) {
103 		RDSV3_DPRINTF5("rdsv3_message_purge", "putting data page %p\n",
104 		    (void *)rdsv3_sg_page(&rm->m_sg[i]));
105 		/* XXX will have to put_page for page refs */
106 		kmem_free(rdsv3_sg_page(&rm->m_sg[i]),
107 		    rdsv3_sg_len(&rm->m_sg[i]));
108 	}
109 
110 	if (rm->m_rdma_op)
111 		rdsv3_rdma_free_op(rm->m_rdma_op);
112 	if (rm->m_rdma_mr) {
113 		struct rdsv3_mr *mr = rm->m_rdma_mr;
114 		if (mr->r_refcount == 0) {
115 			RDSV3_DPRINTF4("rdsv3_message_purge ASSERT 0",
116 			    "rm %p mr %p", rm, mr);
117 			return;
118 		}
119 		if (mr->r_refcount == 0xdeadbeef) {
120 			RDSV3_DPRINTF4("rdsv3_message_purge ASSERT deadbeef",
121 			    "rm %p mr %p", rm, mr);
122 			return;
123 		}
124 		if (atomic_dec_and_test(&mr->r_refcount)) {
125 			rm->m_rdma_mr = NULL;
126 			__rdsv3_put_mr_final(mr);
127 		}
128 	}
129 
130 	RDSV3_DPRINTF4("rdsv3_message_purge", "Return(rm: %p)", rm);
131 
132 }
133 
134 void
135 rdsv3_message_inc_purge(struct rdsv3_incoming *inc)
136 {
137 	struct rdsv3_message *rm =
138 	    container_of(inc, struct rdsv3_message, m_inc);
139 	rdsv3_message_purge(rm);
140 }
141 
142 void
143 rdsv3_message_put(struct rdsv3_message *rm)
144 {
145 	RDSV3_DPRINTF5("rdsv3_message_put",
146 	    "put rm %p ref %d\n", rm, atomic_get(&rm->m_refcount));
147 
148 	if (atomic_dec_and_test(&rm->m_refcount)) {
149 		ASSERT(!list_link_active(&rm->m_sock_item));
150 		ASSERT(!list_link_active(&rm->m_conn_item));
151 		rdsv3_message_purge(rm);
152 
153 		kmem_free(rm, sizeof (struct rdsv3_message) +
154 		    (rm->m_nents * sizeof (struct rdsv3_scatterlist)));
155 	}
156 }
157 
158 void
159 rdsv3_message_inc_free(struct rdsv3_incoming *inc)
160 {
161 	struct rdsv3_message *rm =
162 	    container_of(inc, struct rdsv3_message, m_inc);
163 	rdsv3_message_put(rm);
164 }
165 
166 void
167 rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport,
168     uint16_be_t dport, uint64_t seq)
169 {
170 	hdr->h_flags = 0;
171 	hdr->h_sport = sport;
172 	hdr->h_dport = dport;
173 	hdr->h_sequence = htonll(seq);
174 	hdr->h_exthdr[0] = RDSV3_EXTHDR_NONE;
175 }
176 
177 int
178 rdsv3_message_add_extension(struct rdsv3_header *hdr,
179     unsigned int type, const void *data, unsigned int len)
180 {
181 	unsigned int ext_len = sizeof (uint8_t) + len;
182 	unsigned char *dst;
183 
184 	RDSV3_DPRINTF4("rdsv3_message_add_extension", "Enter");
185 
186 	/* For now, refuse to add more than one extension header */
187 	if (hdr->h_exthdr[0] != RDSV3_EXTHDR_NONE)
188 		return (0);
189 
190 	if (type >= __RDSV3_EXTHDR_MAX ||
191 	    len != rdsv3_exthdr_size[type])
192 		return (0);
193 
194 	if (ext_len >= RDSV3_HEADER_EXT_SPACE)
195 		return (0);
196 	dst = hdr->h_exthdr;
197 
198 	*dst++ = type;
199 	(void) memcpy(dst, data, len);
200 
201 	dst[len] = RDSV3_EXTHDR_NONE;
202 
203 	RDSV3_DPRINTF4("rdsv3_message_add_extension", "Return");
204 	return (1);
205 }
206 
207 /*
208  * If a message has extension headers, retrieve them here.
209  * Call like this:
210  *
211  * unsigned int pos = 0;
212  *
213  * while (1) {
214  *	buflen = sizeof(buffer);
215  *	type = rdsv3_message_next_extension(hdr, &pos, buffer, &buflen);
216  *	if (type == RDSV3_EXTHDR_NONE)
217  *		break;
218  *	...
219  * }
220  */
221 int
222 rdsv3_message_next_extension(struct rdsv3_header *hdr,
223     unsigned int *pos, void *buf, unsigned int *buflen)
224 {
225 	unsigned int offset, ext_type, ext_len;
226 	uint8_t *src = hdr->h_exthdr;
227 
228 	RDSV3_DPRINTF4("rdsv3_message_next_extension", "Enter");
229 
230 	offset = *pos;
231 	if (offset >= RDSV3_HEADER_EXT_SPACE)
232 		goto none;
233 
234 	/*
235 	 * Get the extension type and length. For now, the
236 	 * length is implied by the extension type.
237 	 */
238 	ext_type = src[offset++];
239 
240 	if (ext_type == RDSV3_EXTHDR_NONE || ext_type >= __RDSV3_EXTHDR_MAX)
241 		goto none;
242 	ext_len = rdsv3_exthdr_size[ext_type];
243 	if (offset + ext_len > RDSV3_HEADER_EXT_SPACE)
244 		goto none;
245 
246 	*pos = offset + ext_len;
247 	if (ext_len < *buflen)
248 		*buflen = ext_len;
249 	(void) memcpy(buf, src + offset, *buflen);
250 	return (ext_type);
251 
252 none:
253 	*pos = RDSV3_HEADER_EXT_SPACE;
254 	*buflen = 0;
255 	return (RDSV3_EXTHDR_NONE);
256 }
257 
258 int
259 rdsv3_message_add_version_extension(struct rdsv3_header *hdr,
260     unsigned int version)
261 {
262 	struct rdsv3_ext_header_version ext_hdr;
263 
264 	ext_hdr.h_version = htonl(version);
265 	return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_VERSION,
266 	    &ext_hdr, sizeof (ext_hdr)));
267 }
268 
269 int
270 rdsv3_message_get_version_extension(struct rdsv3_header *hdr,
271     unsigned int *version)
272 {
273 	struct rdsv3_ext_header_version ext_hdr;
274 	unsigned int pos = 0, len = sizeof (ext_hdr);
275 
276 	RDSV3_DPRINTF4("rdsv3_message_get_version_extension", "Enter");
277 
278 	/*
279 	 * We assume the version extension is the only one present
280 	 */
281 	if (rdsv3_message_next_extension(hdr, &pos, &ext_hdr, &len) !=
282 	    RDSV3_EXTHDR_VERSION)
283 		return (0);
284 	*version = ntohl(ext_hdr.h_version);
285 	return (1);
286 }
287 
288 int
289 rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, uint32_t r_key,
290     uint32_t offset)
291 {
292 	struct rdsv3_ext_header_rdma_dest ext_hdr;
293 
294 	ext_hdr.h_rdma_rkey = htonl(r_key);
295 	ext_hdr.h_rdma_offset = htonl(offset);
296 	return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_RDMA_DEST,
297 	    &ext_hdr, sizeof (ext_hdr)));
298 }
299 
300 struct rdsv3_message *
301 rdsv3_message_alloc(unsigned int nents, int gfp)
302 {
303 	struct rdsv3_message *rm;
304 
305 	RDSV3_DPRINTF4("rdsv3_message_alloc", "Enter(nents: %d)", nents);
306 
307 	rm = kmem_zalloc(sizeof (struct rdsv3_message) +
308 	    (nents * sizeof (struct rdsv3_scatterlist)), gfp);
309 	if (!rm)
310 		goto out;
311 
312 	rm->m_refcount = 1;
313 	list_link_init(&rm->m_sock_item);
314 	list_link_init(&rm->m_conn_item);
315 	mutex_init(&rm->m_rs_lock, NULL, MUTEX_DRIVER, NULL);
316 
317 	RDSV3_DPRINTF4("rdsv3_message_alloc", "Return(rm: %p)", rm);
318 out:
319 	return (rm);
320 }
321 
322 struct rdsv3_message *
323 rdsv3_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
324 {
325 	struct rdsv3_message *rm;
326 	unsigned int i;
327 
328 	RDSV3_DPRINTF4("rdsv3_message_map_pages", "Enter(len: %d)", total_len);
329 
330 #ifndef __lock_lint
331 	rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP);
332 #else
333 	rm = NULL;
334 #endif
335 	if (rm == NULL)
336 		return (ERR_PTR(-ENOMEM));
337 
338 	set_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags);
339 	rm->m_inc.i_hdr.h_len = htonl(total_len);
340 #ifndef __lock_lint
341 	rm->m_nents = ceil(total_len, PAGE_SIZE);
342 #else
343 	rm->m_nents = 0;
344 #endif
345 
346 	for (i = 0; i < rm->m_nents; ++i) {
347 		rdsv3_sg_set_page(&rm->m_sg[i],
348 		    page_addrs[i],
349 		    PAGE_SIZE, 0);
350 	}
351 
352 	return (rm);
353 }
354 
355 struct rdsv3_message *
356 rdsv3_message_copy_from_user(struct uio *uiop,
357     size_t total_len)
358 {
359 	struct rdsv3_message *rm;
360 	struct rdsv3_scatterlist *sg;
361 	int ret;
362 
363 	RDSV3_DPRINTF4("rdsv3_message_copy_from_user", "Enter: %d", total_len);
364 
365 #ifndef __lock_lint
366 	rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP);
367 #else
368 	rm = NULL;
369 #endif
370 	if (rm == NULL) {
371 		ret = -ENOMEM;
372 		goto out;
373 	}
374 
375 	rm->m_inc.i_hdr.h_len = htonl(total_len);
376 
377 	/*
378 	 * now allocate and copy in the data payload.
379 	 */
380 	sg = rm->m_sg;
381 
382 	while (total_len) {
383 		if (rdsv3_sg_page(sg) == NULL) {
384 			ret = rdsv3_page_remainder_alloc(sg, total_len, 0);
385 			if (ret)
386 				goto out;
387 			rm->m_nents++;
388 		}
389 
390 		ret = uiomove(rdsv3_sg_page(sg), rdsv3_sg_len(sg), UIO_WRITE,
391 		    uiop);
392 		if (ret) {
393 			RDSV3_DPRINTF2("rdsv3_message_copy_from_user",
394 			    "uiomove failed");
395 			ret = -ret;
396 			goto out;
397 		}
398 
399 		total_len -= rdsv3_sg_len(sg);
400 		sg++;
401 	}
402 
403 	ret = 0;
404 out:
405 	if (ret) {
406 		if (rm)
407 			rdsv3_message_put(rm);
408 		rm = ERR_PTR(ret);
409 	}
410 	return (rm);
411 }
412 
413 int
414 rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc,
415     uio_t *uiop, size_t size)
416 {
417 	struct rdsv3_message *rm;
418 	struct rdsv3_scatterlist *sg;
419 	unsigned long to_copy;
420 	unsigned long vec_off;
421 	int copied;
422 	int ret;
423 	uint32_t len;
424 
425 	rm = container_of(inc, struct rdsv3_message, m_inc);
426 	len = ntohl(rm->m_inc.i_hdr.h_len);
427 
428 	RDSV3_DPRINTF4("rdsv3_message_inc_copy_to_user",
429 	    "Enter(rm: %p, len: %d)", rm, len);
430 
431 	sg = rm->m_sg;
432 	vec_off = 0;
433 	copied = 0;
434 
435 	while (copied < size && copied < len) {
436 
437 		to_copy = min(len - copied, sg->length - vec_off);
438 		to_copy = min(size - copied, to_copy);
439 
440 		RDSV3_DPRINTF5("rdsv3_message_inc_copy_to_user",
441 		    "copying %lu bytes to user iov %p from sg [%p, %u] + %lu\n",
442 		    to_copy, uiop,
443 		    rdsv3_sg_page(sg), sg->length, vec_off);
444 
445 		ret = uiomove(rdsv3_sg_page(sg), to_copy, UIO_READ, uiop);
446 		if (ret)
447 			break;
448 
449 		vec_off += to_copy;
450 		copied += to_copy;
451 
452 		if (vec_off == sg->length) {
453 			vec_off = 0;
454 			sg++;
455 		}
456 	}
457 
458 	return (copied);
459 }
460 
461 /*
462  * If the message is still on the send queue, wait until the transport
463  * is done with it. This is particularly important for RDMA operations.
464  */
465 void
466 rdsv3_message_wait(struct rdsv3_message *rm)
467 {
468 	rdsv3_wait_event(&rdsv3_message_flush_waitq,
469 	    !test_bit(RDSV3_MSG_MAPPED, &rm->m_flags));
470 }
471 
472 void
473 rdsv3_message_unmapped(struct rdsv3_message *rm)
474 {
475 	clear_bit(RDSV3_MSG_MAPPED, &rm->m_flags);
476 	rdsv3_wake_up_all(&rdsv3_message_flush_waitq);
477 }
478