ib_cma.c revision b00a0b4b47b9734b96ffde11a1a0517818c529f6
1/*
2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#define	LINUXKPI_PARAM_PREFIX ibcore_
37
38#include <linux/completion.h>
39#include <linux/in.h>
40#include <linux/in6.h>
41#include <linux/mutex.h>
42#include <linux/random.h>
43#include <linux/idr.h>
44#include <linux/inetdevice.h>
45#include <linux/slab.h>
46#include <linux/module.h>
47#include <net/route.h>
48
49#include <net/tcp.h>
50#include <net/ipv6.h>
51
52#include <netinet6/scope6_var.h>
53#include <netinet6/ip6_var.h>
54
55#include <rdma/rdma_cm.h>
56#include <rdma/rdma_cm_ib.h>
57#include <rdma/ib.h>
58#include <rdma/ib_addr.h>
59#include <rdma/ib_cache.h>
60#include <rdma/ib_cm.h>
61#include <rdma/ib_sa.h>
62#include <rdma/iw_cm.h>
63
64#include <sys/priv.h>
65
66#include "core_priv.h"
67
68MODULE_AUTHOR("Sean Hefty");
69MODULE_DESCRIPTION("Generic RDMA CM Agent");
70MODULE_LICENSE("Dual BSD/GPL");
71
72#define CMA_CM_RESPONSE_TIMEOUT 20
73#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
74#define CMA_MAX_CM_RETRIES 15
75#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
76#define CMA_IBOE_PACKET_LIFETIME 18
77
78static const char * const cma_events[] = {
79	[RDMA_CM_EVENT_ADDR_RESOLVED]	 = "address resolved",
80	[RDMA_CM_EVENT_ADDR_ERROR]	 = "address error",
81	[RDMA_CM_EVENT_ROUTE_RESOLVED]	 = "route resolved ",
82	[RDMA_CM_EVENT_ROUTE_ERROR]	 = "route error",
83	[RDMA_CM_EVENT_CONNECT_REQUEST]	 = "connect request",
84	[RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response",
85	[RDMA_CM_EVENT_CONNECT_ERROR]	 = "connect error",
86	[RDMA_CM_EVENT_UNREACHABLE]	 = "unreachable",
87	[RDMA_CM_EVENT_REJECTED]	 = "rejected",
88	[RDMA_CM_EVENT_ESTABLISHED]	 = "established",
89	[RDMA_CM_EVENT_DISCONNECTED]	 = "disconnected",
90	[RDMA_CM_EVENT_DEVICE_REMOVAL]	 = "device removal",
91	[RDMA_CM_EVENT_MULTICAST_JOIN]	 = "multicast join",
92	[RDMA_CM_EVENT_MULTICAST_ERROR]	 = "multicast error",
93	[RDMA_CM_EVENT_ADDR_CHANGE]	 = "address change",
94	[RDMA_CM_EVENT_TIMEWAIT_EXIT]	 = "timewait exit",
95};
96
97const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
98{
99	size_t index = event;
100
101	return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ?
102			cma_events[index] : "unrecognized event";
103}
104EXPORT_SYMBOL(rdma_event_msg);
105
106static void cma_add_one(struct ib_device *device);
107static void cma_remove_one(struct ib_device *device, void *client_data);
108
109static struct ib_client cma_client = {
110	.name   = "cma",
111	.add    = cma_add_one,
112	.remove = cma_remove_one
113};
114
115static struct ib_sa_client sa_client;
116static struct rdma_addr_client addr_client;
117static LIST_HEAD(dev_list);
118static LIST_HEAD(listen_any_list);
119static DEFINE_MUTEX(lock);
120static struct workqueue_struct *cma_wq;
121
122struct cma_pernet {
123	struct idr tcp_ps;
124	struct idr udp_ps;
125	struct idr ipoib_ps;
126	struct idr ib_ps;
127};
128
129VNET_DEFINE(struct cma_pernet, cma_pernet);
130
131static struct cma_pernet *cma_pernet_ptr(struct vnet *vnet)
132{
133	struct cma_pernet *retval;
134
135	CURVNET_SET_QUIET(vnet);
136	retval = &VNET(cma_pernet);
137	CURVNET_RESTORE();
138
139	return (retval);
140}
141
142static struct idr *cma_pernet_idr(struct vnet *net, enum rdma_port_space ps)
143{
144	struct cma_pernet *pernet = cma_pernet_ptr(net);
145
146	switch (ps) {
147	case RDMA_PS_TCP:
148		return &pernet->tcp_ps;
149	case RDMA_PS_UDP:
150		return &pernet->udp_ps;
151	case RDMA_PS_IPOIB:
152		return &pernet->ipoib_ps;
153	case RDMA_PS_IB:
154		return &pernet->ib_ps;
155	default:
156		return NULL;
157	}
158}
159
160struct cma_device {
161	struct list_head	list;
162	struct ib_device	*device;
163	struct completion	comp;
164	atomic_t		refcount;
165	struct list_head	id_list;
166	struct sysctl_ctx_list	sysctl_ctx;
167	enum ib_gid_type	*default_gid_type;
168};
169
170struct rdma_bind_list {
171	enum rdma_port_space	ps;
172	struct hlist_head	owners;
173	unsigned short		port;
174};
175
176struct class_port_info_context {
177	struct ib_class_port_info	*class_port_info;
178	struct ib_device		*device;
179	struct completion		done;
180	struct ib_sa_query		*sa_query;
181	u8				port_num;
182};
183
184static int cma_ps_alloc(struct vnet *vnet, enum rdma_port_space ps,
185			struct rdma_bind_list *bind_list, int snum)
186{
187	struct idr *idr = cma_pernet_idr(vnet, ps);
188
189	return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
190}
191
192static struct rdma_bind_list *cma_ps_find(struct vnet *net,
193					  enum rdma_port_space ps, int snum)
194{
195	struct idr *idr = cma_pernet_idr(net, ps);
196
197	return idr_find(idr, snum);
198}
199
200static void cma_ps_remove(struct vnet *net, enum rdma_port_space ps, int snum)
201{
202	struct idr *idr = cma_pernet_idr(net, ps);
203
204	idr_remove(idr, snum);
205}
206
207enum {
208	CMA_OPTION_AFONLY,
209};
210
211void cma_ref_dev(struct cma_device *cma_dev)
212{
213	atomic_inc(&cma_dev->refcount);
214}
215
216struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter	filter,
217					     void		*cookie)
218{
219	struct cma_device *cma_dev;
220	struct cma_device *found_cma_dev = NULL;
221
222	mutex_lock(&lock);
223
224	list_for_each_entry(cma_dev, &dev_list, list)
225		if (filter(cma_dev->device, cookie)) {
226			found_cma_dev = cma_dev;
227			break;
228		}
229
230	if (found_cma_dev)
231		cma_ref_dev(found_cma_dev);
232	mutex_unlock(&lock);
233	return found_cma_dev;
234}
235
236int cma_get_default_gid_type(struct cma_device *cma_dev,
237			     unsigned int port)
238{
239	if (port < rdma_start_port(cma_dev->device) ||
240	    port > rdma_end_port(cma_dev->device))
241		return -EINVAL;
242
243	return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
244}
245
246int cma_set_default_gid_type(struct cma_device *cma_dev,
247			     unsigned int port,
248			     enum ib_gid_type default_gid_type)
249{
250	unsigned long supported_gids;
251
252	if (port < rdma_start_port(cma_dev->device) ||
253	    port > rdma_end_port(cma_dev->device))
254		return -EINVAL;
255
256	supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
257
258	if (!(supported_gids & 1 << default_gid_type))
259		return -EINVAL;
260
261	cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
262		default_gid_type;
263
264	return 0;
265}
266
267struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
268{
269	return cma_dev->device;
270}
271
272/*
273 * Device removal can occur at anytime, so we need extra handling to
274 * serialize notifying the user of device removal with other callbacks.
275 * We do this by disabling removal notification while a callback is in process,
276 * and reporting it after the callback completes.
277 */
278struct rdma_id_private {
279	struct rdma_cm_id	id;
280
281	struct rdma_bind_list	*bind_list;
282	struct hlist_node	node;
283	struct list_head	list; /* listen_any_list or cma_device.list */
284	struct list_head	listen_list; /* per device listens */
285	struct cma_device	*cma_dev;
286	struct list_head	mc_list;
287
288	int			internal_id;
289	enum rdma_cm_state	state;
290	spinlock_t		lock;
291	struct mutex		qp_mutex;
292
293	struct completion	comp;
294	atomic_t		refcount;
295	struct mutex		handler_mutex;
296
297	int			backlog;
298	int			timeout_ms;
299	struct ib_sa_query	*query;
300	int			query_id;
301	union {
302		struct ib_cm_id	*ib;
303		struct iw_cm_id	*iw;
304	} cm_id;
305
306	u32			seq_num;
307	u32			qkey;
308	u32			qp_num;
309	pid_t			owner;
310	u32			options;
311	u8			srq;
312	u8			tos;
313	u8			reuseaddr;
314	u8			afonly;
315	enum ib_gid_type	gid_type;
316};
317
318struct cma_multicast {
319	struct rdma_id_private *id_priv;
320	union {
321		struct ib_sa_multicast *ib;
322	} multicast;
323	struct list_head	list;
324	void			*context;
325	struct sockaddr_storage	addr;
326	struct kref		mcref;
327	bool			igmp_joined;
328	u8			join_state;
329};
330
331struct cma_work {
332	struct work_struct	work;
333	struct rdma_id_private	*id;
334	enum rdma_cm_state	old_state;
335	enum rdma_cm_state	new_state;
336	struct rdma_cm_event	event;
337};
338
339struct cma_ndev_work {
340	struct work_struct	work;
341	struct rdma_id_private	*id;
342	struct rdma_cm_event	event;
343};
344
345struct iboe_mcast_work {
346	struct work_struct	 work;
347	struct rdma_id_private	*id;
348	struct cma_multicast	*mc;
349};
350
351union cma_ip_addr {
352	struct in6_addr ip6;
353	struct {
354		__be32 pad[3];
355		__be32 addr;
356	} ip4;
357};
358
359struct cma_hdr {
360	u8 cma_version;
361	u8 ip_version;	/* IP version: 7:4 */
362	__be16 port;
363	union cma_ip_addr src_addr;
364	union cma_ip_addr dst_addr;
365};
366
367#define CMA_VERSION 0x00
368
369struct cma_req_info {
370	struct ib_device *device;
371	int port;
372	union ib_gid local_gid;
373	__be64 service_id;
374	u16 pkey;
375	bool has_gid:1;
376};
377
378static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
379{
380	unsigned long flags;
381	int ret;
382
383	spin_lock_irqsave(&id_priv->lock, flags);
384	ret = (id_priv->state == comp);
385	spin_unlock_irqrestore(&id_priv->lock, flags);
386	return ret;
387}
388
389static int cma_comp_exch(struct rdma_id_private *id_priv,
390			 enum rdma_cm_state comp, enum rdma_cm_state exch)
391{
392	unsigned long flags;
393	int ret;
394
395	spin_lock_irqsave(&id_priv->lock, flags);
396	if ((ret = (id_priv->state == comp)))
397		id_priv->state = exch;
398	spin_unlock_irqrestore(&id_priv->lock, flags);
399	return ret;
400}
401
402static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
403				   enum rdma_cm_state exch)
404{
405	unsigned long flags;
406	enum rdma_cm_state old;
407
408	spin_lock_irqsave(&id_priv->lock, flags);
409	old = id_priv->state;
410	id_priv->state = exch;
411	spin_unlock_irqrestore(&id_priv->lock, flags);
412	return old;
413}
414
415static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
416{
417	return hdr->ip_version >> 4;
418}
419
420static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
421{
422	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
423}
424
425static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
426			       struct cma_device *cma_dev)
427{
428	cma_ref_dev(cma_dev);
429	id_priv->cma_dev = cma_dev;
430	id_priv->gid_type = 0;
431	id_priv->id.device = cma_dev->device;
432	id_priv->id.route.addr.dev_addr.transport =
433		rdma_node_get_transport(cma_dev->device->node_type);
434	list_add_tail(&id_priv->list, &cma_dev->id_list);
435}
436
437static void cma_attach_to_dev(struct rdma_id_private *id_priv,
438			      struct cma_device *cma_dev)
439{
440	_cma_attach_to_dev(id_priv, cma_dev);
441	id_priv->gid_type =
442		cma_dev->default_gid_type[id_priv->id.port_num -
443					  rdma_start_port(cma_dev->device)];
444}
445
446void cma_deref_dev(struct cma_device *cma_dev)
447{
448	if (atomic_dec_and_test(&cma_dev->refcount))
449		complete(&cma_dev->comp);
450}
451
452static inline void release_mc(struct kref *kref)
453{
454	struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
455
456	kfree(mc->multicast.ib);
457	kfree(mc);
458}
459
460static void cma_release_dev(struct rdma_id_private *id_priv)
461{
462	mutex_lock(&lock);
463	list_del(&id_priv->list);
464	cma_deref_dev(id_priv->cma_dev);
465	id_priv->cma_dev = NULL;
466	mutex_unlock(&lock);
467}
468
469static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
470{
471	return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
472}
473
474static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
475{
476	return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
477}
478
479static inline unsigned short cma_family(struct rdma_id_private *id_priv)
480{
481	return id_priv->id.route.addr.src_addr.ss_family;
482}
483
484static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
485{
486	struct ib_sa_mcmember_rec rec;
487	int ret = 0;
488
489	if (id_priv->qkey) {
490		if (qkey && id_priv->qkey != qkey)
491			return -EINVAL;
492		return 0;
493	}
494
495	if (qkey) {
496		id_priv->qkey = qkey;
497		return 0;
498	}
499
500	switch (id_priv->id.ps) {
501	case RDMA_PS_UDP:
502	case RDMA_PS_IB:
503		id_priv->qkey = RDMA_UDP_QKEY;
504		break;
505	case RDMA_PS_IPOIB:
506		ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
507		ret = ib_sa_get_mcmember_rec(id_priv->id.device,
508					     id_priv->id.port_num, &rec.mgid,
509					     &rec);
510		if (!ret)
511			id_priv->qkey = be32_to_cpu(rec.qkey);
512		break;
513	default:
514		break;
515	}
516	return ret;
517}
518
519static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
520{
521	dev_addr->dev_type = ARPHRD_INFINIBAND;
522	rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr);
523	ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey));
524}
525
526static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
527{
528	int ret;
529
530	if (addr->sa_family != AF_IB) {
531		ret = rdma_translate_ip(addr, dev_addr, NULL);
532	} else {
533		cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
534		ret = 0;
535	}
536
537	return ret;
538}
539
540static inline int cma_validate_port(struct ib_device *device, u8 port,
541				    enum ib_gid_type gid_type,
542				      union ib_gid *gid, int dev_type,
543				      struct vnet *net,
544				      int bound_if_index)
545{
546	int ret = -ENODEV;
547	struct net_device *ndev = NULL;
548
549	if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
550		return ret;
551
552	if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
553		return ret;
554
555	if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
556		ndev = dev_get_by_index(net, bound_if_index);
557		if (ndev && ndev->if_flags & IFF_LOOPBACK) {
558			pr_info("detected loopback device\n");
559			dev_put(ndev);
560
561			if (!device->get_netdev)
562				return -EOPNOTSUPP;
563
564			ndev = device->get_netdev(device, port);
565			if (!ndev)
566				return -ENODEV;
567		}
568	} else {
569		gid_type = IB_GID_TYPE_IB;
570	}
571
572	ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
573					 ndev, NULL);
574
575	if (ndev)
576		dev_put(ndev);
577
578	return ret;
579}
580
581static int cma_acquire_dev(struct rdma_id_private *id_priv,
582			   struct rdma_id_private *listen_id_priv)
583{
584	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
585	struct cma_device *cma_dev;
586	union ib_gid gid, iboe_gid, *gidp;
587	int ret = -ENODEV;
588	u8 port;
589
590	if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
591	    id_priv->id.ps == RDMA_PS_IPOIB)
592		return -EINVAL;
593
594	mutex_lock(&lock);
595	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
596		    &iboe_gid);
597
598	memcpy(&gid, dev_addr->src_dev_addr +
599	       rdma_addr_gid_offset(dev_addr), sizeof gid);
600
601	if (listen_id_priv) {
602		cma_dev = listen_id_priv->cma_dev;
603		port = listen_id_priv->id.port_num;
604		gidp = rdma_protocol_roce(cma_dev->device, port) ?
605		       &iboe_gid : &gid;
606
607		ret = cma_validate_port(cma_dev->device, port,
608					rdma_protocol_ib(cma_dev->device, port) ?
609					IB_GID_TYPE_IB :
610					listen_id_priv->gid_type, gidp,
611					dev_addr->dev_type,
612					dev_addr->net,
613					dev_addr->bound_dev_if);
614		if (!ret) {
615			id_priv->id.port_num = port;
616			goto out;
617		}
618	}
619
620	list_for_each_entry(cma_dev, &dev_list, list) {
621		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
622			if (listen_id_priv &&
623			    listen_id_priv->cma_dev == cma_dev &&
624			    listen_id_priv->id.port_num == port)
625				continue;
626
627			gidp = rdma_protocol_roce(cma_dev->device, port) ?
628			       &iboe_gid : &gid;
629
630			ret = cma_validate_port(cma_dev->device, port,
631						rdma_protocol_ib(cma_dev->device, port) ?
632						IB_GID_TYPE_IB :
633						cma_dev->default_gid_type[port - 1],
634						gidp, dev_addr->dev_type,
635						dev_addr->net,
636						dev_addr->bound_dev_if);
637			if (!ret) {
638				id_priv->id.port_num = port;
639				goto out;
640			}
641		}
642	}
643
644out:
645	if (!ret)
646		cma_attach_to_dev(id_priv, cma_dev);
647
648	mutex_unlock(&lock);
649	return ret;
650}
651
652/*
653 * Select the source IB device and address to reach the destination IB address.
654 */
655static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
656{
657	struct cma_device *cma_dev, *cur_dev;
658	struct sockaddr_ib *addr;
659	union ib_gid gid, sgid, *dgid;
660	u16 pkey, index;
661	u8 p;
662	int i;
663
664	cma_dev = NULL;
665	addr = (struct sockaddr_ib *) cma_dst_addr(id_priv);
666	dgid = (union ib_gid *) &addr->sib_addr;
667	pkey = ntohs(addr->sib_pkey);
668
669	list_for_each_entry(cur_dev, &dev_list, list) {
670		for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
671			if (!rdma_cap_af_ib(cur_dev->device, p))
672				continue;
673
674			if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
675				continue;
676
677			for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
678						       &gid, NULL);
679			     i++) {
680				if (!memcmp(&gid, dgid, sizeof(gid))) {
681					cma_dev = cur_dev;
682					sgid = gid;
683					id_priv->id.port_num = p;
684					goto found;
685				}
686
687				if (!cma_dev && (gid.global.subnet_prefix ==
688						 dgid->global.subnet_prefix)) {
689					cma_dev = cur_dev;
690					sgid = gid;
691					id_priv->id.port_num = p;
692				}
693			}
694		}
695	}
696
697	if (!cma_dev)
698		return -ENODEV;
699
700found:
701	cma_attach_to_dev(id_priv, cma_dev);
702	addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
703	memcpy(&addr->sib_addr, &sgid, sizeof sgid);
704	cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
705	return 0;
706}
707
708static void cma_deref_id(struct rdma_id_private *id_priv)
709{
710	if (atomic_dec_and_test(&id_priv->refcount))
711		complete(&id_priv->comp);
712}
713
714struct rdma_cm_id *rdma_create_id(struct vnet *net,
715				  rdma_cm_event_handler event_handler,
716				  void *context, enum rdma_port_space ps,
717				  enum ib_qp_type qp_type)
718{
719	struct rdma_id_private *id_priv;
720
721	id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
722	if (!id_priv)
723		return ERR_PTR(-ENOMEM);
724
725	id_priv->owner = task_pid_nr(current);
726	id_priv->state = RDMA_CM_IDLE;
727	id_priv->id.context = context;
728	id_priv->id.event_handler = event_handler;
729	id_priv->id.ps = ps;
730	id_priv->id.qp_type = qp_type;
731	spin_lock_init(&id_priv->lock);
732	mutex_init(&id_priv->qp_mutex);
733	init_completion(&id_priv->comp);
734	atomic_set(&id_priv->refcount, 1);
735	mutex_init(&id_priv->handler_mutex);
736	INIT_LIST_HEAD(&id_priv->listen_list);
737	INIT_LIST_HEAD(&id_priv->mc_list);
738	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
739	id_priv->id.route.addr.dev_addr.net = TD_TO_VNET(curthread);
740
741	return &id_priv->id;
742}
743EXPORT_SYMBOL(rdma_create_id);
744
745static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
746{
747	struct ib_qp_attr qp_attr;
748	int qp_attr_mask, ret;
749
750	qp_attr.qp_state = IB_QPS_INIT;
751	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
752	if (ret)
753		return ret;
754
755	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
756	if (ret)
757		return ret;
758
759	qp_attr.qp_state = IB_QPS_RTR;
760	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
761	if (ret)
762		return ret;
763
764	qp_attr.qp_state = IB_QPS_RTS;
765	qp_attr.sq_psn = 0;
766	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
767
768	return ret;
769}
770
771static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
772{
773	struct ib_qp_attr qp_attr;
774	int qp_attr_mask, ret;
775
776	qp_attr.qp_state = IB_QPS_INIT;
777	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
778	if (ret)
779		return ret;
780
781	return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
782}
783
784int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
785		   struct ib_qp_init_attr *qp_init_attr)
786{
787	struct rdma_id_private *id_priv;
788	struct ib_qp *qp;
789	int ret;
790
791	id_priv = container_of(id, struct rdma_id_private, id);
792	if (id->device != pd->device)
793		return -EINVAL;
794
795	qp_init_attr->port_num = id->port_num;
796	qp = ib_create_qp(pd, qp_init_attr);
797	if (IS_ERR(qp))
798		return PTR_ERR(qp);
799
800	if (id->qp_type == IB_QPT_UD)
801		ret = cma_init_ud_qp(id_priv, qp);
802	else
803		ret = cma_init_conn_qp(id_priv, qp);
804	if (ret)
805		goto err;
806
807	id->qp = qp;
808	id_priv->qp_num = qp->qp_num;
809	id_priv->srq = (qp->srq != NULL);
810	return 0;
811err:
812	ib_destroy_qp(qp);
813	return ret;
814}
815EXPORT_SYMBOL(rdma_create_qp);
816
817void rdma_destroy_qp(struct rdma_cm_id *id)
818{
819	struct rdma_id_private *id_priv;
820
821	id_priv = container_of(id, struct rdma_id_private, id);
822	mutex_lock(&id_priv->qp_mutex);
823	ib_destroy_qp(id_priv->id.qp);
824	id_priv->id.qp = NULL;
825	mutex_unlock(&id_priv->qp_mutex);
826}
827EXPORT_SYMBOL(rdma_destroy_qp);
828
829static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
830			     struct rdma_conn_param *conn_param)
831{
832	struct ib_qp_attr qp_attr;
833	int qp_attr_mask, ret;
834	union ib_gid sgid;
835
836	mutex_lock(&id_priv->qp_mutex);
837	if (!id_priv->id.qp) {
838		ret = 0;
839		goto out;
840	}
841
842	/* Need to update QP attributes from default values. */
843	qp_attr.qp_state = IB_QPS_INIT;
844	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
845	if (ret)
846		goto out;
847
848	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
849	if (ret)
850		goto out;
851
852	qp_attr.qp_state = IB_QPS_RTR;
853	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
854	if (ret)
855		goto out;
856
857	ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
858			   qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
859	if (ret)
860		goto out;
861
862	BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
863
864	if (conn_param)
865		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
866	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
867out:
868	mutex_unlock(&id_priv->qp_mutex);
869	return ret;
870}
871
872static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
873			     struct rdma_conn_param *conn_param)
874{
875	struct ib_qp_attr qp_attr;
876	int qp_attr_mask, ret;
877
878	mutex_lock(&id_priv->qp_mutex);
879	if (!id_priv->id.qp) {
880		ret = 0;
881		goto out;
882	}
883
884	qp_attr.qp_state = IB_QPS_RTS;
885	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
886	if (ret)
887		goto out;
888
889	if (conn_param)
890		qp_attr.max_rd_atomic = conn_param->initiator_depth;
891	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
892out:
893	mutex_unlock(&id_priv->qp_mutex);
894	return ret;
895}
896
897static int cma_modify_qp_err(struct rdma_id_private *id_priv)
898{
899	struct ib_qp_attr qp_attr;
900	int ret;
901
902	mutex_lock(&id_priv->qp_mutex);
903	if (!id_priv->id.qp) {
904		ret = 0;
905		goto out;
906	}
907
908	qp_attr.qp_state = IB_QPS_ERR;
909	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
910out:
911	mutex_unlock(&id_priv->qp_mutex);
912	return ret;
913}
914
915static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
916			       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
917{
918	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
919	int ret;
920	u16 pkey;
921
922	if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num))
923		pkey = 0xffff;
924	else
925		pkey = ib_addr_get_pkey(dev_addr);
926
927	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
928				  pkey, &qp_attr->pkey_index);
929	if (ret)
930		return ret;
931
932	qp_attr->port_num = id_priv->id.port_num;
933	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
934
935	if (id_priv->id.qp_type == IB_QPT_UD) {
936		ret = cma_set_qkey(id_priv, 0);
937		if (ret)
938			return ret;
939
940		qp_attr->qkey = id_priv->qkey;
941		*qp_attr_mask |= IB_QP_QKEY;
942	} else {
943		qp_attr->qp_access_flags = 0;
944		*qp_attr_mask |= IB_QP_ACCESS_FLAGS;
945	}
946	return 0;
947}
948
949int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
950		       int *qp_attr_mask)
951{
952	struct rdma_id_private *id_priv;
953	int ret = 0;
954
955	id_priv = container_of(id, struct rdma_id_private, id);
956	if (rdma_cap_ib_cm(id->device, id->port_num)) {
957		if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD))
958			ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
959		else
960			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
961						 qp_attr_mask);
962
963		if (qp_attr->qp_state == IB_QPS_RTR)
964			qp_attr->rq_psn = id_priv->seq_num;
965	} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
966		if (!id_priv->cm_id.iw) {
967			qp_attr->qp_access_flags = 0;
968			*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
969		} else
970			ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
971						 qp_attr_mask);
972	} else
973		ret = -ENOSYS;
974
975	return ret;
976}
977EXPORT_SYMBOL(rdma_init_qp_attr);
978
979static inline int cma_zero_addr(struct sockaddr *addr)
980{
981	switch (addr->sa_family) {
982	case AF_INET:
983		return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
984	case AF_INET6:
985		return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr);
986	case AF_IB:
987		return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
988	default:
989		return 0;
990	}
991}
992
993static inline int cma_loopback_addr(struct sockaddr *addr)
994{
995	switch (addr->sa_family) {
996	case AF_INET:
997		return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
998	case AF_INET6:
999		return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
1000	case AF_IB:
1001		return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
1002	default:
1003		return 0;
1004	}
1005}
1006
1007static inline int cma_any_addr(struct sockaddr *addr)
1008{
1009	return cma_zero_addr(addr) || cma_loopback_addr(addr);
1010}
1011
1012static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
1013{
1014	if (src->sa_family != dst->sa_family)
1015		return -1;
1016
1017	switch (src->sa_family) {
1018	case AF_INET:
1019		return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
1020		       ((struct sockaddr_in *) dst)->sin_addr.s_addr;
1021	case AF_INET6:
1022		return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
1023				     &((struct sockaddr_in6 *) dst)->sin6_addr);
1024	default:
1025		return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
1026				   &((struct sockaddr_ib *) dst)->sib_addr);
1027	}
1028}
1029
1030static __be16 cma_port(struct sockaddr *addr)
1031{
1032	struct sockaddr_ib *sib;
1033
1034	switch (addr->sa_family) {
1035	case AF_INET:
1036		return ((struct sockaddr_in *) addr)->sin_port;
1037	case AF_INET6:
1038		return ((struct sockaddr_in6 *) addr)->sin6_port;
1039	case AF_IB:
1040		sib = (struct sockaddr_ib *) addr;
1041		return htons((u16) (be64_to_cpu(sib->sib_sid) &
1042				    be64_to_cpu(sib->sib_sid_mask)));
1043	default:
1044		return 0;
1045	}
1046}
1047
1048static inline int cma_any_port(struct sockaddr *addr)
1049{
1050	return !cma_port(addr);
1051}
1052
1053static void cma_save_ib_info(struct sockaddr *src_addr,
1054			     struct sockaddr *dst_addr,
1055			     struct rdma_cm_id *listen_id,
1056			     struct ib_sa_path_rec *path)
1057{
1058	struct sockaddr_ib *listen_ib, *ib;
1059
1060	listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
1061	if (src_addr) {
1062		ib = (struct sockaddr_ib *)src_addr;
1063		ib->sib_family = AF_IB;
1064		if (path) {
1065			ib->sib_pkey = path->pkey;
1066			ib->sib_flowinfo = path->flow_label;
1067			memcpy(&ib->sib_addr, &path->sgid, 16);
1068			ib->sib_sid = path->service_id;
1069			ib->sib_scope_id = 0;
1070		} else {
1071			ib->sib_pkey = listen_ib->sib_pkey;
1072			ib->sib_flowinfo = listen_ib->sib_flowinfo;
1073			ib->sib_addr = listen_ib->sib_addr;
1074			ib->sib_sid = listen_ib->sib_sid;
1075			ib->sib_scope_id = listen_ib->sib_scope_id;
1076		}
1077		ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
1078	}
1079	if (dst_addr) {
1080		ib = (struct sockaddr_ib *)dst_addr;
1081		ib->sib_family = AF_IB;
1082		if (path) {
1083			ib->sib_pkey = path->pkey;
1084			ib->sib_flowinfo = path->flow_label;
1085			memcpy(&ib->sib_addr, &path->dgid, 16);
1086		}
1087	}
1088}
1089
1090static void cma_save_ip4_info(struct sockaddr_in *src_addr,
1091			      struct sockaddr_in *dst_addr,
1092			      struct cma_hdr *hdr,
1093			      __be16 local_port)
1094{
1095	if (src_addr) {
1096		*src_addr = (struct sockaddr_in) {
1097			.sin_len = sizeof(struct sockaddr_in),
1098			.sin_family = AF_INET,
1099			.sin_addr.s_addr = hdr->dst_addr.ip4.addr,
1100			.sin_port = local_port,
1101		};
1102	}
1103
1104	if (dst_addr) {
1105		*dst_addr = (struct sockaddr_in) {
1106			.sin_len = sizeof(struct sockaddr_in),
1107			.sin_family = AF_INET,
1108			.sin_addr.s_addr = hdr->src_addr.ip4.addr,
1109			.sin_port = hdr->port,
1110		};
1111	}
1112}
1113
1114static void cma_save_ip6_info(struct sockaddr_in6 *src_addr,
1115			      struct sockaddr_in6 *dst_addr,
1116			      struct cma_hdr *hdr,
1117			      __be16 local_port)
1118{
1119	if (src_addr) {
1120		*src_addr = (struct sockaddr_in6) {
1121			.sin6_family = AF_INET6,
1122			.sin6_addr = hdr->dst_addr.ip6,
1123			.sin6_port = local_port,
1124		};
1125	}
1126
1127	if (dst_addr) {
1128		*dst_addr = (struct sockaddr_in6) {
1129			.sin6_len = sizeof(struct sockaddr_in6),
1130			.sin6_family = AF_INET6,
1131			.sin6_addr = hdr->src_addr.ip6,
1132			.sin6_port = hdr->port,
1133		};
1134	}
1135}
1136
1137static u16 cma_port_from_service_id(__be64 service_id)
1138{
1139	return (u16)be64_to_cpu(service_id);
1140}
1141
1142static int cma_save_ip_info(struct sockaddr *src_addr,
1143			    struct sockaddr *dst_addr,
1144			    struct ib_cm_event *ib_event,
1145			    __be64 service_id)
1146{
1147	struct cma_hdr *hdr;
1148	__be16 port;
1149
1150	hdr = ib_event->private_data;
1151	if (hdr->cma_version != CMA_VERSION)
1152		return -EINVAL;
1153
1154	port = htons(cma_port_from_service_id(service_id));
1155
1156	switch (cma_get_ip_ver(hdr)) {
1157	case 4:
1158		cma_save_ip4_info((struct sockaddr_in *)src_addr,
1159				  (struct sockaddr_in *)dst_addr, hdr, port);
1160		break;
1161	case 6:
1162		cma_save_ip6_info((struct sockaddr_in6 *)src_addr,
1163				  (struct sockaddr_in6 *)dst_addr, hdr, port);
1164		break;
1165	default:
1166		return -EAFNOSUPPORT;
1167	}
1168
1169	return 0;
1170}
1171
1172static int cma_save_net_info(struct sockaddr *src_addr,
1173			     struct sockaddr *dst_addr,
1174			     struct rdma_cm_id *listen_id,
1175			     struct ib_cm_event *ib_event,
1176			     sa_family_t sa_family, __be64 service_id)
1177{
1178	if (sa_family == AF_IB) {
1179		if (ib_event->event == IB_CM_REQ_RECEIVED)
1180			cma_save_ib_info(src_addr, dst_addr, listen_id,
1181					 ib_event->param.req_rcvd.primary_path);
1182		else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
1183			cma_save_ib_info(src_addr, dst_addr, listen_id, NULL);
1184		return 0;
1185	}
1186
1187	return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id);
1188}
1189
1190static int cma_save_req_info(const struct ib_cm_event *ib_event,
1191			     struct cma_req_info *req)
1192{
1193	const struct ib_cm_req_event_param *req_param =
1194		&ib_event->param.req_rcvd;
1195	const struct ib_cm_sidr_req_event_param *sidr_param =
1196		&ib_event->param.sidr_req_rcvd;
1197
1198	switch (ib_event->event) {
1199	case IB_CM_REQ_RECEIVED:
1200		req->device	= req_param->listen_id->device;
1201		req->port	= req_param->port;
1202		memcpy(&req->local_gid, &req_param->primary_path->sgid,
1203		       sizeof(req->local_gid));
1204		req->has_gid	= true;
1205		req->service_id	= req_param->primary_path->service_id;
1206		req->pkey	= be16_to_cpu(req_param->primary_path->pkey);
1207		if (req->pkey != req_param->bth_pkey)
1208			pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
1209					    "RDMA CMA: in the future this may cause the request to be dropped\n",
1210					    req_param->bth_pkey, req->pkey);
1211		break;
1212	case IB_CM_SIDR_REQ_RECEIVED:
1213		req->device	= sidr_param->listen_id->device;
1214		req->port	= sidr_param->port;
1215		req->has_gid	= false;
1216		req->service_id	= sidr_param->service_id;
1217		req->pkey	= sidr_param->pkey;
1218		if (req->pkey != sidr_param->bth_pkey)
1219			pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n"
1220					    "RDMA CMA: in the future this may cause the request to be dropped\n",
1221					    sidr_param->bth_pkey, req->pkey);
1222		break;
1223	default:
1224		return -EINVAL;
1225	}
1226
1227	return 0;
1228}
1229
1230static bool validate_ipv4_net_dev(struct net_device *net_dev,
1231				  const struct sockaddr_in *dst_addr,
1232				  const struct sockaddr_in *src_addr)
1233{
1234#ifdef INET
1235	struct sockaddr_in dst_tmp = *dst_addr;
1236	__be32 daddr = dst_addr->sin_addr.s_addr,
1237	       saddr = src_addr->sin_addr.s_addr;
1238	struct net_device *src_dev;
1239	struct rtentry *rte;
1240	bool ret;
1241
1242	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1243	    ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) ||
1244	    ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) ||
1245	    ipv4_is_loopback(saddr))
1246		return false;
1247
1248	src_dev = ip_dev_find(net_dev->if_vnet, saddr);
1249	if (src_dev != net_dev)
1250		return false;
1251
1252	/*
1253	 * Make sure the socket address length field
1254	 * is set, else rtalloc1() will fail.
1255	 */
1256	dst_tmp.sin_len = sizeof(dst_tmp);
1257
1258	CURVNET_SET(net_dev->if_vnet);
1259	rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0);
1260	CURVNET_RESTORE();
1261	if (rte != NULL) {
1262		ret = (rte->rt_ifp == net_dev);
1263		RTFREE_LOCKED(rte);
1264	} else {
1265		ret = false;
1266	}
1267	return ret;
1268#else
1269	return false;
1270#endif
1271}
1272
1273static bool validate_ipv6_net_dev(struct net_device *net_dev,
1274				  const struct sockaddr_in6 *dst_addr,
1275				  const struct sockaddr_in6 *src_addr)
1276{
1277#ifdef INET6
1278	struct sockaddr_in6 dst_tmp = *dst_addr;
1279	struct in6_addr in6_addr = src_addr->sin6_addr;
1280	struct net_device *src_dev;
1281	struct rtentry *rte;
1282	bool ret;
1283
1284	/* embed scope ID */
1285	in6_addr.s6_addr[3] = src_addr->sin6_scope_id;
1286
1287	src_dev = ip6_dev_find(net_dev->if_vnet, in6_addr);
1288	if (src_dev != net_dev)
1289		return false;
1290
1291	/*
1292	 * Make sure the socket address length field
1293	 * is set, else rtalloc1() will fail.
1294	 */
1295	dst_tmp.sin6_len = sizeof(dst_tmp);
1296
1297	CURVNET_SET(net_dev->if_vnet);
1298	rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0);
1299	CURVNET_RESTORE();
1300	if (rte != NULL) {
1301		ret = (rte->rt_ifp == net_dev);
1302		RTFREE_LOCKED(rte);
1303	} else {
1304		ret = false;
1305	}
1306	return ret;
1307#else
1308	return false;
1309#endif
1310}
1311
1312static bool validate_net_dev(struct net_device *net_dev,
1313			     const struct sockaddr *daddr,
1314			     const struct sockaddr *saddr)
1315{
1316	const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr;
1317	const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr;
1318	const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
1319	const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr;
1320
1321	switch (daddr->sa_family) {
1322	case AF_INET:
1323		return saddr->sa_family == AF_INET &&
1324		       validate_ipv4_net_dev(net_dev, daddr4, saddr4);
1325
1326	case AF_INET6:
1327		return saddr->sa_family == AF_INET6 &&
1328		       validate_ipv6_net_dev(net_dev, daddr6, saddr6);
1329
1330	default:
1331		return false;
1332	}
1333}
1334
1335static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
1336					  const struct cma_req_info *req)
1337{
1338	struct sockaddr_storage listen_addr_storage, src_addr_storage;
1339	struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage,
1340			*src_addr = (struct sockaddr *)&src_addr_storage;
1341	struct net_device *net_dev;
1342	const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL;
1343	int err;
1344
1345	err = cma_save_ip_info(listen_addr, src_addr, ib_event,
1346			       req->service_id);
1347	if (err)
1348		return ERR_PTR(err);
1349
1350	net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey,
1351					   gid, listen_addr);
1352	if (!net_dev)
1353		return ERR_PTR(-ENODEV);
1354
1355	if (!validate_net_dev(net_dev, listen_addr, src_addr)) {
1356		dev_put(net_dev);
1357		return ERR_PTR(-EHOSTUNREACH);
1358	}
1359
1360	return net_dev;
1361}
1362
1363static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id)
1364{
1365	return (be64_to_cpu(service_id) >> 16) & 0xffff;
1366}
1367
1368static bool cma_match_private_data(struct rdma_id_private *id_priv,
1369				   const struct cma_hdr *hdr)
1370{
1371	struct sockaddr *addr = cma_src_addr(id_priv);
1372	__be32 ip4_addr;
1373	struct in6_addr ip6_addr;
1374
1375	if (cma_any_addr(addr) && !id_priv->afonly)
1376		return true;
1377
1378	switch (addr->sa_family) {
1379	case AF_INET:
1380		ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr;
1381		if (cma_get_ip_ver(hdr) != 4)
1382			return false;
1383		if (!cma_any_addr(addr) &&
1384		    hdr->dst_addr.ip4.addr != ip4_addr)
1385			return false;
1386		break;
1387	case AF_INET6:
1388		ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr;
1389		if (cma_get_ip_ver(hdr) != 6)
1390			return false;
1391		if (!cma_any_addr(addr) &&
1392		    memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr)))
1393			return false;
1394		break;
1395	case AF_IB:
1396		return true;
1397	default:
1398		return false;
1399	}
1400
1401	return true;
1402}
1403
1404static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num)
1405{
1406	enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num);
1407	enum rdma_transport_type transport =
1408		rdma_node_get_transport(device->node_type);
1409
1410	return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB;
1411}
1412
1413static bool cma_protocol_roce(const struct rdma_cm_id *id)
1414{
1415	struct ib_device *device = id->device;
1416	const int port_num = id->port_num ?: rdma_start_port(device);
1417
1418	return cma_protocol_roce_dev_port(device, port_num);
1419}
1420
1421static bool cma_match_net_dev(const struct rdma_cm_id *id,
1422			      const struct net_device *net_dev,
1423			      u8 port_num)
1424{
1425	const struct rdma_addr *addr = &id->route.addr;
1426
1427	if (!net_dev)
1428		/* This request is an AF_IB request or a RoCE request */
1429		return (!id->port_num || id->port_num == port_num) &&
1430		       (addr->src_addr.ss_family == AF_IB ||
1431			cma_protocol_roce_dev_port(id->device, port_num));
1432
1433	return !addr->dev_addr.bound_dev_if ||
1434	       (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
1435		addr->dev_addr.bound_dev_if == net_dev->if_index);
1436}
1437
1438static struct rdma_id_private *cma_find_listener(
1439		const struct rdma_bind_list *bind_list,
1440		const struct ib_cm_id *cm_id,
1441		const struct ib_cm_event *ib_event,
1442		const struct cma_req_info *req,
1443		const struct net_device *net_dev)
1444{
1445	struct rdma_id_private *id_priv, *id_priv_dev;
1446
1447	if (!bind_list)
1448		return ERR_PTR(-EINVAL);
1449
1450	hlist_for_each_entry(id_priv, &bind_list->owners, node) {
1451		if (cma_match_private_data(id_priv, ib_event->private_data)) {
1452			if (id_priv->id.device == cm_id->device &&
1453			    cma_match_net_dev(&id_priv->id, net_dev, req->port))
1454				return id_priv;
1455			list_for_each_entry(id_priv_dev,
1456					    &id_priv->listen_list,
1457					    listen_list) {
1458				if (id_priv_dev->id.device == cm_id->device &&
1459				    cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
1460					return id_priv_dev;
1461			}
1462		}
1463	}
1464
1465	return ERR_PTR(-EINVAL);
1466}
1467
1468static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
1469						 struct ib_cm_event *ib_event,
1470						 struct net_device **net_dev)
1471{
1472	struct cma_req_info req;
1473	struct rdma_bind_list *bind_list;
1474	struct rdma_id_private *id_priv;
1475	int err;
1476
1477	err = cma_save_req_info(ib_event, &req);
1478	if (err)
1479		return ERR_PTR(err);
1480
1481	*net_dev = cma_get_net_dev(ib_event, &req);
1482	if (IS_ERR(*net_dev)) {
1483		if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
1484			/* Assuming the protocol is AF_IB */
1485			*net_dev = NULL;
1486		} else if (cma_protocol_roce_dev_port(req.device, req.port)) {
1487			/* TODO find the net dev matching the request parameters
1488			 * through the RoCE GID table */
1489			*net_dev = NULL;
1490		} else {
1491			return ERR_CAST(*net_dev);
1492		}
1493	}
1494
1495	bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
1496				rdma_ps_from_service_id(req.service_id),
1497				cma_port_from_service_id(req.service_id));
1498	id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
1499	if (IS_ERR(id_priv) && *net_dev) {
1500		dev_put(*net_dev);
1501		*net_dev = NULL;
1502	}
1503
1504	return id_priv;
1505}
1506
1507static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
1508{
1509	return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
1510}
1511
1512static void cma_cancel_route(struct rdma_id_private *id_priv)
1513{
1514	if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) {
1515		if (id_priv->query)
1516			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
1517	}
1518}
1519
1520static void cma_cancel_listens(struct rdma_id_private *id_priv)
1521{
1522	struct rdma_id_private *dev_id_priv;
1523
1524	/*
1525	 * Remove from listen_any_list to prevent added devices from spawning
1526	 * additional listen requests.
1527	 */
1528	mutex_lock(&lock);
1529	list_del(&id_priv->list);
1530
1531	while (!list_empty(&id_priv->listen_list)) {
1532		dev_id_priv = list_entry(id_priv->listen_list.next,
1533					 struct rdma_id_private, listen_list);
1534		/* sync with device removal to avoid duplicate destruction */
1535		list_del_init(&dev_id_priv->list);
1536		list_del(&dev_id_priv->listen_list);
1537		mutex_unlock(&lock);
1538
1539		rdma_destroy_id(&dev_id_priv->id);
1540		mutex_lock(&lock);
1541	}
1542	mutex_unlock(&lock);
1543}
1544
1545static void cma_cancel_operation(struct rdma_id_private *id_priv,
1546				 enum rdma_cm_state state)
1547{
1548	switch (state) {
1549	case RDMA_CM_ADDR_QUERY:
1550		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
1551		break;
1552	case RDMA_CM_ROUTE_QUERY:
1553		cma_cancel_route(id_priv);
1554		break;
1555	case RDMA_CM_LISTEN:
1556		if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev)
1557			cma_cancel_listens(id_priv);
1558		break;
1559	default:
1560		break;
1561	}
1562}
1563
1564static void cma_release_port(struct rdma_id_private *id_priv)
1565{
1566	struct rdma_bind_list *bind_list = id_priv->bind_list;
1567	struct vnet *net = id_priv->id.route.addr.dev_addr.net;
1568
1569	if (!bind_list)
1570		return;
1571
1572	mutex_lock(&lock);
1573	hlist_del(&id_priv->node);
1574	if (hlist_empty(&bind_list->owners)) {
1575		cma_ps_remove(net, bind_list->ps, bind_list->port);
1576		kfree(bind_list);
1577	}
1578	mutex_unlock(&lock);
1579}
1580
1581static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
1582{
1583	struct cma_multicast *mc;
1584
1585	while (!list_empty(&id_priv->mc_list)) {
1586		mc = container_of(id_priv->mc_list.next,
1587				  struct cma_multicast, list);
1588		list_del(&mc->list);
1589		if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
1590				      id_priv->id.port_num)) {
1591			ib_sa_free_multicast(mc->multicast.ib);
1592			kfree(mc);
1593		} else {
1594			if (mc->igmp_joined) {
1595				struct rdma_dev_addr *dev_addr =
1596					&id_priv->id.route.addr.dev_addr;
1597				struct net_device *ndev = NULL;
1598
1599				if (dev_addr->bound_dev_if)
1600					ndev = dev_get_by_index(dev_addr->net,
1601								dev_addr->bound_dev_if);
1602				if (ndev) {
1603					dev_put(ndev);
1604				}
1605			}
1606			kref_put(&mc->mcref, release_mc);
1607		}
1608	}
1609}
1610
1611void rdma_destroy_id(struct rdma_cm_id *id)
1612{
1613	struct rdma_id_private *id_priv;
1614	enum rdma_cm_state state;
1615
1616	id_priv = container_of(id, struct rdma_id_private, id);
1617	state = cma_exch(id_priv, RDMA_CM_DESTROYING);
1618	cma_cancel_operation(id_priv, state);
1619
1620	/*
1621	 * Wait for any active callback to finish.  New callbacks will find
1622	 * the id_priv state set to destroying and abort.
1623	 */
1624	mutex_lock(&id_priv->handler_mutex);
1625	mutex_unlock(&id_priv->handler_mutex);
1626
1627	if (id_priv->cma_dev) {
1628		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
1629			if (id_priv->cm_id.ib)
1630				ib_destroy_cm_id(id_priv->cm_id.ib);
1631		} else if (rdma_cap_iw_cm(id_priv->id.device, 1)) {
1632			if (id_priv->cm_id.iw)
1633				iw_destroy_cm_id(id_priv->cm_id.iw);
1634		}
1635		cma_leave_mc_groups(id_priv);
1636		cma_release_dev(id_priv);
1637	}
1638
1639	cma_release_port(id_priv);
1640	cma_deref_id(id_priv);
1641	wait_for_completion(&id_priv->comp);
1642
1643	if (id_priv->internal_id)
1644		cma_deref_id(id_priv->id.context);
1645
1646	kfree(id_priv->id.route.path_rec);
1647	kfree(id_priv);
1648}
1649EXPORT_SYMBOL(rdma_destroy_id);
1650
1651static int cma_rep_recv(struct rdma_id_private *id_priv)
1652{
1653	int ret;
1654
1655	ret = cma_modify_qp_rtr(id_priv, NULL);
1656	if (ret)
1657		goto reject;
1658
1659	ret = cma_modify_qp_rts(id_priv, NULL);
1660	if (ret)
1661		goto reject;
1662
1663	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
1664	if (ret)
1665		goto reject;
1666
1667	return 0;
1668reject:
1669	cma_modify_qp_err(id_priv);
1670	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
1671		       NULL, 0, NULL, 0);
1672	return ret;
1673}
1674
1675static void cma_set_rep_event_data(struct rdma_cm_event *event,
1676				   struct ib_cm_rep_event_param *rep_data,
1677				   void *private_data)
1678{
1679	event->param.conn.private_data = private_data;
1680	event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
1681	event->param.conn.responder_resources = rep_data->responder_resources;
1682	event->param.conn.initiator_depth = rep_data->initiator_depth;
1683	event->param.conn.flow_control = rep_data->flow_control;
1684	event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
1685	event->param.conn.srq = rep_data->srq;
1686	event->param.conn.qp_num = rep_data->remote_qpn;
1687}
1688
1689static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1690{
1691	struct rdma_id_private *id_priv = cm_id->context;
1692	struct rdma_cm_event event;
1693	int ret = 0;
1694
1695	mutex_lock(&id_priv->handler_mutex);
1696	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
1697	     id_priv->state != RDMA_CM_CONNECT) ||
1698	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
1699	     id_priv->state != RDMA_CM_DISCONNECT))
1700		goto out;
1701
1702	memset(&event, 0, sizeof event);
1703	switch (ib_event->event) {
1704	case IB_CM_REQ_ERROR:
1705	case IB_CM_REP_ERROR:
1706		event.event = RDMA_CM_EVENT_UNREACHABLE;
1707		event.status = -ETIMEDOUT;
1708		break;
1709	case IB_CM_REP_RECEIVED:
1710		if (id_priv->id.qp) {
1711			event.status = cma_rep_recv(id_priv);
1712			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
1713						     RDMA_CM_EVENT_ESTABLISHED;
1714		} else {
1715			event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
1716		}
1717		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
1718				       ib_event->private_data);
1719		break;
1720	case IB_CM_RTU_RECEIVED:
1721	case IB_CM_USER_ESTABLISHED:
1722		event.event = RDMA_CM_EVENT_ESTABLISHED;
1723		break;
1724	case IB_CM_DREQ_ERROR:
1725		event.status = -ETIMEDOUT; /* fall through */
1726	case IB_CM_DREQ_RECEIVED:
1727	case IB_CM_DREP_RECEIVED:
1728		if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
1729				   RDMA_CM_DISCONNECT))
1730			goto out;
1731		event.event = RDMA_CM_EVENT_DISCONNECTED;
1732		break;
1733	case IB_CM_TIMEWAIT_EXIT:
1734		event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
1735		break;
1736	case IB_CM_MRA_RECEIVED:
1737		/* ignore event */
1738		goto out;
1739	case IB_CM_REJ_RECEIVED:
1740		cma_modify_qp_err(id_priv);
1741		event.status = ib_event->param.rej_rcvd.reason;
1742		event.event = RDMA_CM_EVENT_REJECTED;
1743		event.param.conn.private_data = ib_event->private_data;
1744		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
1745		break;
1746	default:
1747		pr_err("RDMA CMA: unexpected IB CM event: %d\n",
1748		       ib_event->event);
1749		goto out;
1750	}
1751
1752	ret = id_priv->id.event_handler(&id_priv->id, &event);
1753	if (ret) {
1754		/* Destroy the CM ID by returning a non-zero value. */
1755		id_priv->cm_id.ib = NULL;
1756		cma_exch(id_priv, RDMA_CM_DESTROYING);
1757		mutex_unlock(&id_priv->handler_mutex);
1758		rdma_destroy_id(&id_priv->id);
1759		return ret;
1760	}
1761out:
1762	mutex_unlock(&id_priv->handler_mutex);
1763	return ret;
1764}
1765
1766static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1767					       struct ib_cm_event *ib_event,
1768					       struct net_device *net_dev)
1769{
1770	struct rdma_id_private *id_priv;
1771	struct rdma_cm_id *id;
1772	struct rdma_route *rt;
1773	const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
1774	const __be64 service_id =
1775		      ib_event->param.req_rcvd.primary_path->service_id;
1776	int ret;
1777
1778	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
1779			    listen_id->event_handler, listen_id->context,
1780			    listen_id->ps, ib_event->param.req_rcvd.qp_type);
1781	if (IS_ERR(id))
1782		return NULL;
1783
1784	id_priv = container_of(id, struct rdma_id_private, id);
1785	if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
1786			      (struct sockaddr *)&id->route.addr.dst_addr,
1787			      listen_id, ib_event, ss_family, service_id))
1788		goto err;
1789
1790	rt = &id->route;
1791	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1792	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
1793			       GFP_KERNEL);
1794	if (!rt->path_rec)
1795		goto err;
1796
1797	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1798	if (rt->num_paths == 2)
1799		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1800
1801	if (net_dev) {
1802		ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
1803		if (ret)
1804			goto err;
1805	} else {
1806		if (!cma_protocol_roce(listen_id) &&
1807		    cma_any_addr(cma_src_addr(id_priv))) {
1808			rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
1809			rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1810			ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
1811		} else if (!cma_any_addr(cma_src_addr(id_priv))) {
1812			ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);
1813			if (ret)
1814				goto err;
1815		}
1816	}
1817	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1818
1819	id_priv->state = RDMA_CM_CONNECT;
1820	return id_priv;
1821
1822err:
1823	rdma_destroy_id(id);
1824	return NULL;
1825}
1826
1827static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1828					      struct ib_cm_event *ib_event,
1829					      struct net_device *net_dev)
1830{
1831	struct rdma_id_private *id_priv;
1832	struct rdma_cm_id *id;
1833	const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
1834	struct vnet *net = listen_id->route.addr.dev_addr.net;
1835	int ret;
1836
1837	id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
1838			    listen_id->ps, IB_QPT_UD);
1839	if (IS_ERR(id))
1840		return NULL;
1841
1842	id_priv = container_of(id, struct rdma_id_private, id);
1843	if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
1844			      (struct sockaddr *)&id->route.addr.dst_addr,
1845			      listen_id, ib_event, ss_family,
1846			      ib_event->param.sidr_req_rcvd.service_id))
1847		goto err;
1848
1849	if (net_dev) {
1850		ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
1851		if (ret)
1852			goto err;
1853	} else {
1854		if (!cma_any_addr(cma_src_addr(id_priv))) {
1855			ret = cma_translate_addr(cma_src_addr(id_priv),
1856						 &id->route.addr.dev_addr);
1857			if (ret)
1858				goto err;
1859		}
1860	}
1861
1862	id_priv->state = RDMA_CM_CONNECT;
1863	return id_priv;
1864err:
1865	rdma_destroy_id(id);
1866	return NULL;
1867}
1868
1869static void cma_set_req_event_data(struct rdma_cm_event *event,
1870				   struct ib_cm_req_event_param *req_data,
1871				   void *private_data, int offset)
1872{
1873	event->param.conn.private_data = (char *)private_data + offset;
1874	event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1875	event->param.conn.responder_resources = req_data->responder_resources;
1876	event->param.conn.initiator_depth = req_data->initiator_depth;
1877	event->param.conn.flow_control = req_data->flow_control;
1878	event->param.conn.retry_count = req_data->retry_count;
1879	event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1880	event->param.conn.srq = req_data->srq;
1881	event->param.conn.qp_num = req_data->remote_qpn;
1882}
1883
1884static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
1885{
1886	return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
1887		 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
1888		((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
1889		 (id->qp_type == IB_QPT_UD)) ||
1890		(!id->qp_type));
1891}
1892
1893static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1894{
1895	struct rdma_id_private *listen_id, *conn_id = NULL;
1896	struct rdma_cm_event event;
1897	struct net_device *net_dev;
1898	int offset, ret;
1899
1900	listen_id = cma_id_from_event(cm_id, ib_event, &net_dev);
1901	if (IS_ERR(listen_id))
1902		return PTR_ERR(listen_id);
1903
1904	if (!cma_check_req_qp_type(&listen_id->id, ib_event)) {
1905		ret = -EINVAL;
1906		goto net_dev_put;
1907	}
1908
1909	mutex_lock(&listen_id->handler_mutex);
1910	if (listen_id->state != RDMA_CM_LISTEN) {
1911		ret = -ECONNABORTED;
1912		goto err1;
1913	}
1914
1915	memset(&event, 0, sizeof event);
1916	offset = cma_user_data_offset(listen_id);
1917	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1918	if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
1919		conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev);
1920		event.param.ud.private_data = (char *)ib_event->private_data + offset;
1921		event.param.ud.private_data_len =
1922				IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1923	} else {
1924		conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev);
1925		cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1926				       ib_event->private_data, offset);
1927	}
1928	if (!conn_id) {
1929		ret = -ENOMEM;
1930		goto err1;
1931	}
1932
1933	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1934	ret = cma_acquire_dev(conn_id, listen_id);
1935	if (ret)
1936		goto err2;
1937
1938	conn_id->cm_id.ib = cm_id;
1939	cm_id->context = conn_id;
1940	cm_id->cm_handler = cma_ib_handler;
1941
1942	/*
1943	 * Protect against the user destroying conn_id from another thread
1944	 * until we're done accessing it.
1945	 */
1946	atomic_inc(&conn_id->refcount);
1947	ret = conn_id->id.event_handler(&conn_id->id, &event);
1948	if (ret)
1949		goto err3;
1950	/*
1951	 * Acquire mutex to prevent user executing rdma_destroy_id()
1952	 * while we're accessing the cm_id.
1953	 */
1954	mutex_lock(&lock);
1955	if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
1956	    (conn_id->id.qp_type != IB_QPT_UD))
1957		ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1958	mutex_unlock(&lock);
1959	mutex_unlock(&conn_id->handler_mutex);
1960	mutex_unlock(&listen_id->handler_mutex);
1961	cma_deref_id(conn_id);
1962	if (net_dev)
1963		dev_put(net_dev);
1964	return 0;
1965
1966err3:
1967	cma_deref_id(conn_id);
1968	/* Destroy the CM ID by returning a non-zero value. */
1969	conn_id->cm_id.ib = NULL;
1970err2:
1971	cma_exch(conn_id, RDMA_CM_DESTROYING);
1972	mutex_unlock(&conn_id->handler_mutex);
1973err1:
1974	mutex_unlock(&listen_id->handler_mutex);
1975	if (conn_id)
1976		rdma_destroy_id(&conn_id->id);
1977
1978net_dev_put:
1979	if (net_dev)
1980		dev_put(net_dev);
1981
1982	return ret;
1983}
1984
1985__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
1986{
1987	if (addr->sa_family == AF_IB)
1988		return ((struct sockaddr_ib *) addr)->sib_sid;
1989
1990	return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr)));
1991}
1992EXPORT_SYMBOL(rdma_get_service_id);
1993
1994static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1995{
1996	struct rdma_id_private *id_priv = iw_id->context;
1997	struct rdma_cm_event event;
1998	int ret = 0;
1999	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
2000	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
2001
2002	mutex_lock(&id_priv->handler_mutex);
2003	if (id_priv->state != RDMA_CM_CONNECT)
2004		goto out;
2005
2006	memset(&event, 0, sizeof event);
2007	switch (iw_event->event) {
2008	case IW_CM_EVENT_CLOSE:
2009		event.event = RDMA_CM_EVENT_DISCONNECTED;
2010		break;
2011	case IW_CM_EVENT_CONNECT_REPLY:
2012		memcpy(cma_src_addr(id_priv), laddr,
2013		       rdma_addr_size(laddr));
2014		memcpy(cma_dst_addr(id_priv), raddr,
2015		       rdma_addr_size(raddr));
2016		switch (iw_event->status) {
2017		case 0:
2018			event.event = RDMA_CM_EVENT_ESTABLISHED;
2019			event.param.conn.initiator_depth = iw_event->ird;
2020			event.param.conn.responder_resources = iw_event->ord;
2021			break;
2022		case -ECONNRESET:
2023		case -ECONNREFUSED:
2024			event.event = RDMA_CM_EVENT_REJECTED;
2025			break;
2026		case -ETIMEDOUT:
2027			event.event = RDMA_CM_EVENT_UNREACHABLE;
2028			break;
2029		default:
2030			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
2031			break;
2032		}
2033		break;
2034	case IW_CM_EVENT_ESTABLISHED:
2035		event.event = RDMA_CM_EVENT_ESTABLISHED;
2036		event.param.conn.initiator_depth = iw_event->ird;
2037		event.param.conn.responder_resources = iw_event->ord;
2038		break;
2039	default:
2040		BUG_ON(1);
2041	}
2042
2043	event.status = iw_event->status;
2044	event.param.conn.private_data = iw_event->private_data;
2045	event.param.conn.private_data_len = iw_event->private_data_len;
2046	ret = id_priv->id.event_handler(&id_priv->id, &event);
2047	if (ret) {
2048		/* Destroy the CM ID by returning a non-zero value. */
2049		id_priv->cm_id.iw = NULL;
2050		cma_exch(id_priv, RDMA_CM_DESTROYING);
2051		mutex_unlock(&id_priv->handler_mutex);
2052		rdma_destroy_id(&id_priv->id);
2053		return ret;
2054	}
2055
2056out:
2057	mutex_unlock(&id_priv->handler_mutex);
2058	return ret;
2059}
2060
2061static int iw_conn_req_handler(struct iw_cm_id *cm_id,
2062			       struct iw_cm_event *iw_event)
2063{
2064	struct rdma_cm_id *new_cm_id;
2065	struct rdma_id_private *listen_id, *conn_id;
2066	struct rdma_cm_event event;
2067	int ret = -ECONNABORTED;
2068	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
2069	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
2070
2071	listen_id = cm_id->context;
2072
2073	mutex_lock(&listen_id->handler_mutex);
2074	if (listen_id->state != RDMA_CM_LISTEN)
2075		goto out;
2076
2077	/* Create a new RDMA id for the new IW CM ID */
2078	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
2079				   listen_id->id.event_handler,
2080				   listen_id->id.context,
2081				   RDMA_PS_TCP, IB_QPT_RC);
2082	if (IS_ERR(new_cm_id)) {
2083		ret = -ENOMEM;
2084		goto out;
2085	}
2086	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
2087	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
2088	conn_id->state = RDMA_CM_CONNECT;
2089
2090	ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
2091	if (ret) {
2092		mutex_unlock(&conn_id->handler_mutex);
2093		rdma_destroy_id(new_cm_id);
2094		goto out;
2095	}
2096
2097	ret = cma_acquire_dev(conn_id, listen_id);
2098	if (ret) {
2099		mutex_unlock(&conn_id->handler_mutex);
2100		rdma_destroy_id(new_cm_id);
2101		goto out;
2102	}
2103
2104	conn_id->cm_id.iw = cm_id;
2105	cm_id->context = conn_id;
2106	cm_id->cm_handler = cma_iw_handler;
2107
2108	memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
2109	memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
2110
2111	memset(&event, 0, sizeof event);
2112	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
2113	event.param.conn.private_data = iw_event->private_data;
2114	event.param.conn.private_data_len = iw_event->private_data_len;
2115	event.param.conn.initiator_depth = iw_event->ird;
2116	event.param.conn.responder_resources = iw_event->ord;
2117
2118	/*
2119	 * Protect against the user destroying conn_id from another thread
2120	 * until we're done accessing it.
2121	 */
2122	atomic_inc(&conn_id->refcount);
2123	ret = conn_id->id.event_handler(&conn_id->id, &event);
2124	if (ret) {
2125		/* User wants to destroy the CM ID */
2126		conn_id->cm_id.iw = NULL;
2127		cma_exch(conn_id, RDMA_CM_DESTROYING);
2128		mutex_unlock(&conn_id->handler_mutex);
2129		cma_deref_id(conn_id);
2130		rdma_destroy_id(&conn_id->id);
2131		goto out;
2132	}
2133
2134	mutex_unlock(&conn_id->handler_mutex);
2135	cma_deref_id(conn_id);
2136
2137out:
2138	mutex_unlock(&listen_id->handler_mutex);
2139	return ret;
2140}
2141
2142static int cma_ib_listen(struct rdma_id_private *id_priv)
2143{
2144	struct sockaddr *addr;
2145	struct ib_cm_id	*id;
2146	__be64 svc_id;
2147
2148	addr = cma_src_addr(id_priv);
2149	svc_id = rdma_get_service_id(&id_priv->id, addr);
2150	id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id);
2151	if (IS_ERR(id))
2152		return PTR_ERR(id);
2153	id_priv->cm_id.ib = id;
2154
2155	return 0;
2156}
2157
2158static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
2159{
2160	int ret;
2161	struct iw_cm_id	*id;
2162
2163	id = iw_create_cm_id(id_priv->id.device,
2164			     iw_conn_req_handler,
2165			     id_priv);
2166	if (IS_ERR(id))
2167		return PTR_ERR(id);
2168
2169	id->tos = id_priv->tos;
2170	id_priv->cm_id.iw = id;
2171
2172	memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
2173	       rdma_addr_size(cma_src_addr(id_priv)));
2174
2175	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
2176
2177	if (ret) {
2178		iw_destroy_cm_id(id_priv->cm_id.iw);
2179		id_priv->cm_id.iw = NULL;
2180	}
2181
2182	return ret;
2183}
2184
2185static int cma_listen_handler(struct rdma_cm_id *id,
2186			      struct rdma_cm_event *event)
2187{
2188	struct rdma_id_private *id_priv = id->context;
2189
2190	id->context = id_priv->id.context;
2191	id->event_handler = id_priv->id.event_handler;
2192	return id_priv->id.event_handler(id, event);
2193}
2194
2195static void cma_listen_on_dev(struct rdma_id_private *id_priv,
2196			      struct cma_device *cma_dev)
2197{
2198	struct rdma_id_private *dev_id_priv;
2199	struct rdma_cm_id *id;
2200	struct vnet *net = id_priv->id.route.addr.dev_addr.net;
2201	int ret;
2202
2203	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
2204		return;
2205
2206	id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
2207			    id_priv->id.qp_type);
2208	if (IS_ERR(id))
2209		return;
2210
2211	dev_id_priv = container_of(id, struct rdma_id_private, id);
2212
2213	dev_id_priv->state = RDMA_CM_ADDR_BOUND;
2214	memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
2215	       rdma_addr_size(cma_src_addr(id_priv)));
2216
2217	_cma_attach_to_dev(dev_id_priv, cma_dev);
2218	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
2219	atomic_inc(&id_priv->refcount);
2220	dev_id_priv->internal_id = 1;
2221	dev_id_priv->afonly = id_priv->afonly;
2222
2223	ret = rdma_listen(id, id_priv->backlog);
2224	if (ret)
2225		pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
2226			ret, cma_dev->device->name);
2227}
2228
2229static void cma_listen_on_all(struct rdma_id_private *id_priv)
2230{
2231	struct cma_device *cma_dev;
2232
2233	mutex_lock(&lock);
2234	list_add_tail(&id_priv->list, &listen_any_list);
2235	list_for_each_entry(cma_dev, &dev_list, list)
2236		cma_listen_on_dev(id_priv, cma_dev);
2237	mutex_unlock(&lock);
2238}
2239
2240void rdma_set_service_type(struct rdma_cm_id *id, int tos)
2241{
2242	struct rdma_id_private *id_priv;
2243
2244	id_priv = container_of(id, struct rdma_id_private, id);
2245	id_priv->tos = (u8) tos;
2246}
2247EXPORT_SYMBOL(rdma_set_service_type);
2248
2249static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
2250			      void *context)
2251{
2252	struct cma_work *work = context;
2253	struct rdma_route *route;
2254
2255	route = &work->id->id.route;
2256
2257	if (!status) {
2258		route->num_paths = 1;
2259		*route->path_rec = *path_rec;
2260	} else {
2261		work->old_state = RDMA_CM_ROUTE_QUERY;
2262		work->new_state = RDMA_CM_ADDR_RESOLVED;
2263		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
2264		work->event.status = status;
2265	}
2266
2267	queue_work(cma_wq, &work->work);
2268}
2269
2270static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
2271			      struct cma_work *work)
2272{
2273	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2274	struct ib_sa_path_rec path_rec;
2275	ib_sa_comp_mask comp_mask;
2276	struct sockaddr_in6 *sin6;
2277	struct sockaddr_ib *sib;
2278
2279	memset(&path_rec, 0, sizeof path_rec);
2280	rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
2281	rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
2282	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2283	path_rec.numb_path = 1;
2284	path_rec.reversible = 1;
2285	path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
2286
2287	comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
2288		    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
2289		    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
2290
2291	switch (cma_family(id_priv)) {
2292	case AF_INET:
2293		path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
2294		comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
2295		break;
2296	case AF_INET6:
2297		sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
2298		path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
2299		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
2300		break;
2301	case AF_IB:
2302		sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
2303		path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20);
2304		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
2305		break;
2306	}
2307
2308	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
2309					       id_priv->id.port_num, &path_rec,
2310					       comp_mask, timeout_ms,
2311					       GFP_KERNEL, cma_query_handler,
2312					       work, &id_priv->query);
2313
2314	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
2315}
2316
2317static void cma_work_handler(struct work_struct *_work)
2318{
2319	struct cma_work *work = container_of(_work, struct cma_work, work);
2320	struct rdma_id_private *id_priv = work->id;
2321	int destroy = 0;
2322
2323	mutex_lock(&id_priv->handler_mutex);
2324	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
2325		goto out;
2326
2327	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
2328		cma_exch(id_priv, RDMA_CM_DESTROYING);
2329		destroy = 1;
2330	}
2331out:
2332	mutex_unlock(&id_priv->handler_mutex);
2333	cma_deref_id(id_priv);
2334	if (destroy)
2335		rdma_destroy_id(&id_priv->id);
2336	kfree(work);
2337}
2338
2339static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
2340{
2341	struct rdma_route *route = &id_priv->id.route;
2342	struct cma_work *work;
2343	int ret;
2344
2345	work = kzalloc(sizeof *work, GFP_KERNEL);
2346	if (!work)
2347		return -ENOMEM;
2348
2349	work->id = id_priv;
2350	INIT_WORK(&work->work, cma_work_handler);
2351	work->old_state = RDMA_CM_ROUTE_QUERY;
2352	work->new_state = RDMA_CM_ROUTE_RESOLVED;
2353	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2354
2355	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
2356	if (!route->path_rec) {
2357		ret = -ENOMEM;
2358		goto err1;
2359	}
2360
2361	ret = cma_query_ib_route(id_priv, timeout_ms, work);
2362	if (ret)
2363		goto err2;
2364
2365	return 0;
2366err2:
2367	kfree(route->path_rec);
2368	route->path_rec = NULL;
2369err1:
2370	kfree(work);
2371	return ret;
2372}
2373
2374int rdma_set_ib_paths(struct rdma_cm_id *id,
2375		      struct ib_sa_path_rec *path_rec, int num_paths)
2376{
2377	struct rdma_id_private *id_priv;
2378	int ret;
2379
2380	id_priv = container_of(id, struct rdma_id_private, id);
2381	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
2382			   RDMA_CM_ROUTE_RESOLVED))
2383		return -EINVAL;
2384
2385	id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
2386				     GFP_KERNEL);
2387	if (!id->route.path_rec) {
2388		ret = -ENOMEM;
2389		goto err;
2390	}
2391
2392	id->route.num_paths = num_paths;
2393	return 0;
2394err:
2395	cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
2396	return ret;
2397}
2398EXPORT_SYMBOL(rdma_set_ib_paths);
2399
2400static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
2401{
2402	struct cma_work *work;
2403
2404	work = kzalloc(sizeof *work, GFP_KERNEL);
2405	if (!work)
2406		return -ENOMEM;
2407
2408	work->id = id_priv;
2409	INIT_WORK(&work->work, cma_work_handler);
2410	work->old_state = RDMA_CM_ROUTE_QUERY;
2411	work->new_state = RDMA_CM_ROUTE_RESOLVED;
2412	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2413	queue_work(cma_wq, &work->work);
2414	return 0;
2415}
2416
2417static int iboe_tos_to_sl(struct net_device *ndev, int tos)
2418{
2419	/* TODO: Implement this function */
2420	return 0;
2421}
2422
2423static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
2424					   unsigned long supported_gids,
2425					   enum ib_gid_type default_gid)
2426{
2427	if ((network_type == RDMA_NETWORK_IPV4 ||
2428	     network_type == RDMA_NETWORK_IPV6) &&
2429	    test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
2430		return IB_GID_TYPE_ROCE_UDP_ENCAP;
2431
2432	return default_gid;
2433}
2434
2435static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2436{
2437	struct rdma_route *route = &id_priv->id.route;
2438	struct rdma_addr *addr = &route->addr;
2439	struct cma_work *work;
2440	int ret;
2441	struct net_device *ndev = NULL;
2442
2443
2444	work = kzalloc(sizeof *work, GFP_KERNEL);
2445	if (!work)
2446		return -ENOMEM;
2447
2448	work->id = id_priv;
2449	INIT_WORK(&work->work, cma_work_handler);
2450
2451	route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
2452	if (!route->path_rec) {
2453		ret = -ENOMEM;
2454		goto err1;
2455	}
2456
2457	route->num_paths = 1;
2458
2459	if (addr->dev_addr.bound_dev_if) {
2460		unsigned long supported_gids;
2461
2462		ndev = dev_get_by_index(addr->dev_addr.net,
2463					addr->dev_addr.bound_dev_if);
2464		if (!ndev) {
2465			ret = -ENODEV;
2466			goto err2;
2467		}
2468
2469		if (ndev->if_flags & IFF_LOOPBACK) {
2470			dev_put(ndev);
2471			if (!id_priv->id.device->get_netdev) {
2472				ret = -EOPNOTSUPP;
2473				goto err2;
2474			}
2475
2476			ndev = id_priv->id.device->get_netdev(id_priv->id.device,
2477							      id_priv->id.port_num);
2478			if (!ndev) {
2479				ret = -ENODEV;
2480				goto err2;
2481			}
2482		}
2483
2484		route->path_rec->net = ndev->if_vnet;
2485		route->path_rec->ifindex = ndev->if_index;
2486		supported_gids = roce_gid_type_mask_support(id_priv->id.device,
2487							    id_priv->id.port_num);
2488		route->path_rec->gid_type =
2489			cma_route_gid_type(addr->dev_addr.network,
2490					   supported_gids,
2491					   id_priv->gid_type);
2492	}
2493	if (!ndev) {
2494		ret = -ENODEV;
2495		goto err2;
2496	}
2497
2498	memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
2499
2500	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
2501		    &route->path_rec->sgid);
2502	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
2503		    &route->path_rec->dgid);
2504
2505	/* Use the hint from IP Stack to select GID Type */
2506	if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
2507		route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
2508	if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
2509		/* TODO: get the hoplimit from the inet/inet6 device */
2510		route->path_rec->hop_limit = addr->dev_addr.hoplimit;
2511	else
2512		route->path_rec->hop_limit = 1;
2513	route->path_rec->reversible = 1;
2514	route->path_rec->pkey = cpu_to_be16(0xffff);
2515	route->path_rec->mtu_selector = IB_SA_EQ;
2516	route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);
2517	route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu);
2518	route->path_rec->rate_selector = IB_SA_EQ;
2519	route->path_rec->rate = iboe_get_rate(ndev);
2520	dev_put(ndev);
2521	route->path_rec->packet_life_time_selector = IB_SA_EQ;
2522	route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
2523	if (!route->path_rec->mtu) {
2524		ret = -EINVAL;
2525		goto err2;
2526	}
2527
2528	work->old_state = RDMA_CM_ROUTE_QUERY;
2529	work->new_state = RDMA_CM_ROUTE_RESOLVED;
2530	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2531	work->event.status = 0;
2532
2533	queue_work(cma_wq, &work->work);
2534
2535	return 0;
2536
2537err2:
2538	kfree(route->path_rec);
2539	route->path_rec = NULL;
2540err1:
2541	kfree(work);
2542	return ret;
2543}
2544
2545int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
2546{
2547	struct rdma_id_private *id_priv;
2548	int ret;
2549
2550	id_priv = container_of(id, struct rdma_id_private, id);
2551	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
2552		return -EINVAL;
2553
2554	atomic_inc(&id_priv->refcount);
2555	if (rdma_cap_ib_sa(id->device, id->port_num))
2556		ret = cma_resolve_ib_route(id_priv, timeout_ms);
2557	else if (rdma_protocol_roce(id->device, id->port_num))
2558		ret = cma_resolve_iboe_route(id_priv);
2559	else if (rdma_protocol_iwarp(id->device, id->port_num))
2560		ret = cma_resolve_iw_route(id_priv, timeout_ms);
2561	else
2562		ret = -ENOSYS;
2563
2564	if (ret)
2565		goto err;
2566
2567	return 0;
2568err:
2569	cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
2570	cma_deref_id(id_priv);
2571	return ret;
2572}
2573EXPORT_SYMBOL(rdma_resolve_route);
2574
2575static void cma_set_loopback(struct sockaddr *addr)
2576{
2577	switch (addr->sa_family) {
2578	case AF_INET:
2579		((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
2580		break;
2581	case AF_INET6:
2582		ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr,
2583			      0, 0, 0, htonl(1));
2584		break;
2585	default:
2586		ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr,
2587			    0, 0, 0, htonl(1));
2588		break;
2589	}
2590}
2591
2592static int cma_bind_loopback(struct rdma_id_private *id_priv)
2593{
2594	struct cma_device *cma_dev, *cur_dev;
2595	struct ib_port_attr port_attr;
2596	union ib_gid gid;
2597	u16 pkey;
2598	int ret;
2599	u8 p;
2600
2601	cma_dev = NULL;
2602	mutex_lock(&lock);
2603	list_for_each_entry(cur_dev, &dev_list, list) {
2604		if (cma_family(id_priv) == AF_IB &&
2605		    !rdma_cap_ib_cm(cur_dev->device, 1))
2606			continue;
2607
2608		if (!cma_dev)
2609			cma_dev = cur_dev;
2610
2611		for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
2612			if (!ib_query_port(cur_dev->device, p, &port_attr) &&
2613			    port_attr.state == IB_PORT_ACTIVE) {
2614				cma_dev = cur_dev;
2615				goto port_found;
2616			}
2617		}
2618	}
2619
2620	if (!cma_dev) {
2621		ret = -ENODEV;
2622		goto out;
2623	}
2624
2625	p = 1;
2626
2627port_found:
2628	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
2629	if (ret)
2630		goto out;
2631
2632	ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
2633	if (ret)
2634		goto out;
2635
2636	id_priv->id.route.addr.dev_addr.dev_type =
2637		(rdma_protocol_ib(cma_dev->device, p)) ?
2638		ARPHRD_INFINIBAND : ARPHRD_ETHER;
2639
2640	rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2641	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
2642	id_priv->id.port_num = p;
2643	cma_attach_to_dev(id_priv, cma_dev);
2644	cma_set_loopback(cma_src_addr(id_priv));
2645out:
2646	mutex_unlock(&lock);
2647	return ret;
2648}
2649
2650static void addr_handler(int status, struct sockaddr *src_addr,
2651			 struct rdma_dev_addr *dev_addr, void *context)
2652{
2653	struct rdma_id_private *id_priv = context;
2654	struct rdma_cm_event event;
2655
2656	memset(&event, 0, sizeof event);
2657	mutex_lock(&id_priv->handler_mutex);
2658	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
2659			   RDMA_CM_ADDR_RESOLVED))
2660		goto out;
2661
2662	memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
2663	if (!status && !id_priv->cma_dev)
2664		status = cma_acquire_dev(id_priv, NULL);
2665
2666	if (status) {
2667		if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
2668				   RDMA_CM_ADDR_BOUND))
2669			goto out;
2670		event.event = RDMA_CM_EVENT_ADDR_ERROR;
2671		event.status = status;
2672	} else
2673		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2674
2675	if (id_priv->id.event_handler(&id_priv->id, &event)) {
2676		cma_exch(id_priv, RDMA_CM_DESTROYING);
2677		mutex_unlock(&id_priv->handler_mutex);
2678		cma_deref_id(id_priv);
2679		rdma_destroy_id(&id_priv->id);
2680		return;
2681	}
2682out:
2683	mutex_unlock(&id_priv->handler_mutex);
2684	cma_deref_id(id_priv);
2685}
2686
2687static int cma_resolve_loopback(struct rdma_id_private *id_priv)
2688{
2689	struct cma_work *work;
2690	union ib_gid gid;
2691	int ret;
2692
2693	work = kzalloc(sizeof *work, GFP_KERNEL);
2694	if (!work)
2695		return -ENOMEM;
2696
2697	if (!id_priv->cma_dev) {
2698		ret = cma_bind_loopback(id_priv);
2699		if (ret)
2700			goto err;
2701	}
2702
2703	rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2704	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
2705
2706	work->id = id_priv;
2707	INIT_WORK(&work->work, cma_work_handler);
2708	work->old_state = RDMA_CM_ADDR_QUERY;
2709	work->new_state = RDMA_CM_ADDR_RESOLVED;
2710	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2711	queue_work(cma_wq, &work->work);
2712	return 0;
2713err:
2714	kfree(work);
2715	return ret;
2716}
2717
2718static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
2719{
2720	struct cma_work *work;
2721	int ret;
2722
2723	work = kzalloc(sizeof *work, GFP_KERNEL);
2724	if (!work)
2725		return -ENOMEM;
2726
2727	if (!id_priv->cma_dev) {
2728		ret = cma_resolve_ib_dev(id_priv);
2729		if (ret)
2730			goto err;
2731	}
2732
2733	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
2734		&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
2735
2736	work->id = id_priv;
2737	INIT_WORK(&work->work, cma_work_handler);
2738	work->old_state = RDMA_CM_ADDR_QUERY;
2739	work->new_state = RDMA_CM_ADDR_RESOLVED;
2740	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2741	queue_work(cma_wq, &work->work);
2742	return 0;
2743err:
2744	kfree(work);
2745	return ret;
2746}
2747
2748static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2749			 struct sockaddr *dst_addr)
2750{
2751	if (!src_addr || !src_addr->sa_family) {
2752		src_addr = (struct sockaddr *) &id->route.addr.src_addr;
2753		src_addr->sa_family = dst_addr->sa_family;
2754		if (dst_addr->sa_family == AF_INET6) {
2755			struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
2756			struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
2757			src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
2758			if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr))
2759				id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
2760		} else if (dst_addr->sa_family == AF_IB) {
2761			((struct sockaddr_ib *) src_addr)->sib_pkey =
2762				((struct sockaddr_ib *) dst_addr)->sib_pkey;
2763		}
2764	}
2765	return rdma_bind_addr(id, src_addr);
2766}
2767
2768int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2769		      struct sockaddr *dst_addr, int timeout_ms)
2770{
2771	struct rdma_id_private *id_priv;
2772	int ret;
2773
2774	id_priv = container_of(id, struct rdma_id_private, id);
2775	if (id_priv->state == RDMA_CM_IDLE) {
2776		ret = cma_bind_addr(id, src_addr, dst_addr);
2777		if (ret)
2778			return ret;
2779	}
2780
2781	if (cma_family(id_priv) != dst_addr->sa_family)
2782		return -EINVAL;
2783
2784	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
2785		return -EINVAL;
2786
2787	atomic_inc(&id_priv->refcount);
2788	memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
2789	if (cma_any_addr(dst_addr)) {
2790		ret = cma_resolve_loopback(id_priv);
2791	} else {
2792		if (dst_addr->sa_family == AF_IB) {
2793			ret = cma_resolve_ib_addr(id_priv);
2794		} else {
2795			ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
2796					      dst_addr, &id->route.addr.dev_addr,
2797					      timeout_ms, addr_handler, id_priv);
2798		}
2799	}
2800	if (ret)
2801		goto err;
2802
2803	return 0;
2804err:
2805	cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
2806	cma_deref_id(id_priv);
2807	return ret;
2808}
2809EXPORT_SYMBOL(rdma_resolve_addr);
2810
2811int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
2812{
2813	struct rdma_id_private *id_priv;
2814	unsigned long flags;
2815	int ret;
2816
2817	id_priv = container_of(id, struct rdma_id_private, id);
2818	spin_lock_irqsave(&id_priv->lock, flags);
2819	if (reuse || id_priv->state == RDMA_CM_IDLE) {
2820		id_priv->reuseaddr = reuse;
2821		ret = 0;
2822	} else {
2823		ret = -EINVAL;
2824	}
2825	spin_unlock_irqrestore(&id_priv->lock, flags);
2826	return ret;
2827}
2828EXPORT_SYMBOL(rdma_set_reuseaddr);
2829
2830int rdma_set_afonly(struct rdma_cm_id *id, int afonly)
2831{
2832	struct rdma_id_private *id_priv;
2833	unsigned long flags;
2834	int ret;
2835
2836	id_priv = container_of(id, struct rdma_id_private, id);
2837	spin_lock_irqsave(&id_priv->lock, flags);
2838	if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) {
2839		id_priv->options |= (1 << CMA_OPTION_AFONLY);
2840		id_priv->afonly = afonly;
2841		ret = 0;
2842	} else {
2843		ret = -EINVAL;
2844	}
2845	spin_unlock_irqrestore(&id_priv->lock, flags);
2846	return ret;
2847}
2848EXPORT_SYMBOL(rdma_set_afonly);
2849
2850static void cma_bind_port(struct rdma_bind_list *bind_list,
2851			  struct rdma_id_private *id_priv)
2852{
2853	struct sockaddr *addr;
2854	struct sockaddr_ib *sib;
2855	u64 sid, mask;
2856	__be16 port;
2857
2858	addr = cma_src_addr(id_priv);
2859	port = htons(bind_list->port);
2860
2861	switch (addr->sa_family) {
2862	case AF_INET:
2863		((struct sockaddr_in *) addr)->sin_port = port;
2864		break;
2865	case AF_INET6:
2866		((struct sockaddr_in6 *) addr)->sin6_port = port;
2867		break;
2868	case AF_IB:
2869		sib = (struct sockaddr_ib *) addr;
2870		sid = be64_to_cpu(sib->sib_sid);
2871		mask = be64_to_cpu(sib->sib_sid_mask);
2872		sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port));
2873		sib->sib_sid_mask = cpu_to_be64(~0ULL);
2874		break;
2875	}
2876	id_priv->bind_list = bind_list;
2877	hlist_add_head(&id_priv->node, &bind_list->owners);
2878}
2879
2880static int cma_alloc_port(enum rdma_port_space ps,
2881			  struct rdma_id_private *id_priv, unsigned short snum)
2882{
2883	struct rdma_bind_list *bind_list;
2884	int ret;
2885
2886	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
2887	if (!bind_list)
2888		return -ENOMEM;
2889
2890	ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list,
2891			   snum);
2892	if (ret < 0)
2893		goto err;
2894
2895	bind_list->ps = ps;
2896	bind_list->port = (unsigned short)ret;
2897	cma_bind_port(bind_list, id_priv);
2898	return 0;
2899err:
2900	kfree(bind_list);
2901	return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
2902}
2903
2904static int cma_alloc_any_port(enum rdma_port_space ps,
2905			      struct rdma_id_private *id_priv)
2906{
2907	static unsigned int last_used_port;
2908	int low, high, remaining;
2909	unsigned int rover;
2910	struct vnet *net = id_priv->id.route.addr.dev_addr.net;
2911	u32 rand;
2912
2913	inet_get_local_port_range(net, &low, &high);
2914	remaining = (high - low) + 1;
2915	get_random_bytes(&rand, sizeof(rand));
2916	rover = rand % remaining + low;
2917retry:
2918	if (last_used_port != rover &&
2919	    !cma_ps_find(net, ps, (unsigned short)rover)) {
2920		int ret = cma_alloc_port(ps, id_priv, rover);
2921		/*
2922		 * Remember previously used port number in order to avoid
2923		 * re-using same port immediately after it is closed.
2924		 */
2925		if (!ret)
2926			last_used_port = rover;
2927		if (ret != -EADDRNOTAVAIL)
2928			return ret;
2929	}
2930	if (--remaining) {
2931		rover++;
2932		if ((rover < low) || (rover > high))
2933			rover = low;
2934		goto retry;
2935	}
2936	return -EADDRNOTAVAIL;
2937}
2938
2939/*
2940 * Check that the requested port is available.  This is called when trying to
2941 * bind to a specific port, or when trying to listen on a bound port.  In
2942 * the latter case, the provided id_priv may already be on the bind_list, but
2943 * we still need to check that it's okay to start listening.
2944 */
2945static int cma_check_port(struct rdma_bind_list *bind_list,
2946			  struct rdma_id_private *id_priv, uint8_t reuseaddr)
2947{
2948	struct rdma_id_private *cur_id;
2949	struct sockaddr *addr, *cur_addr;
2950
2951	addr = cma_src_addr(id_priv);
2952	hlist_for_each_entry(cur_id, &bind_list->owners, node) {
2953		if (id_priv == cur_id)
2954			continue;
2955
2956		if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
2957		    cur_id->reuseaddr)
2958			continue;
2959
2960		cur_addr = cma_src_addr(cur_id);
2961		if (id_priv->afonly && cur_id->afonly &&
2962		    (addr->sa_family != cur_addr->sa_family))
2963			continue;
2964
2965		if (cma_any_addr(addr) || cma_any_addr(cur_addr))
2966			return -EADDRNOTAVAIL;
2967
2968		if (!cma_addr_cmp(addr, cur_addr))
2969			return -EADDRINUSE;
2970	}
2971	return 0;
2972}
2973
2974static int cma_use_port(enum rdma_port_space ps,
2975			struct rdma_id_private *id_priv)
2976{
2977	struct rdma_bind_list *bind_list;
2978	unsigned short snum;
2979	int ret;
2980
2981	snum = ntohs(cma_port(cma_src_addr(id_priv)));
2982	if (snum < IPPORT_RESERVED &&
2983	    priv_check(curthread, PRIV_NETINET_BINDANY) != 0)
2984		return -EACCES;
2985
2986	bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum);
2987	if (!bind_list) {
2988		ret = cma_alloc_port(ps, id_priv, snum);
2989	} else {
2990		ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
2991		if (!ret)
2992			cma_bind_port(bind_list, id_priv);
2993	}
2994	return ret;
2995}
2996
2997static int cma_bind_listen(struct rdma_id_private *id_priv)
2998{
2999	struct rdma_bind_list *bind_list = id_priv->bind_list;
3000	int ret = 0;
3001
3002	mutex_lock(&lock);
3003	if (bind_list->owners.first->next)
3004		ret = cma_check_port(bind_list, id_priv, 0);
3005	mutex_unlock(&lock);
3006	return ret;
3007}
3008
3009static enum rdma_port_space cma_select_inet_ps(
3010		struct rdma_id_private *id_priv)
3011{
3012	switch (id_priv->id.ps) {
3013	case RDMA_PS_TCP:
3014	case RDMA_PS_UDP:
3015	case RDMA_PS_IPOIB:
3016	case RDMA_PS_IB:
3017		return id_priv->id.ps;
3018	default:
3019
3020		return 0;
3021	}
3022}
3023
3024static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
3025{
3026	enum rdma_port_space ps = 0;
3027	struct sockaddr_ib *sib;
3028	u64 sid_ps, mask, sid;
3029
3030	sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
3031	mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK;
3032	sid = be64_to_cpu(sib->sib_sid) & mask;
3033
3034	if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) {
3035		sid_ps = RDMA_IB_IP_PS_IB;
3036		ps = RDMA_PS_IB;
3037	} else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) &&
3038		   (sid == (RDMA_IB_IP_PS_TCP & mask))) {
3039		sid_ps = RDMA_IB_IP_PS_TCP;
3040		ps = RDMA_PS_TCP;
3041	} else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) &&
3042		   (sid == (RDMA_IB_IP_PS_UDP & mask))) {
3043		sid_ps = RDMA_IB_IP_PS_UDP;
3044		ps = RDMA_PS_UDP;
3045	}
3046
3047	if (ps) {
3048		sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib)));
3049		sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK |
3050						be64_to_cpu(sib->sib_sid_mask));
3051	}
3052	return ps;
3053}
3054
3055static int cma_get_port(struct rdma_id_private *id_priv)
3056{
3057	enum rdma_port_space ps;
3058	int ret;
3059
3060	if (cma_family(id_priv) != AF_IB)
3061		ps = cma_select_inet_ps(id_priv);
3062	else
3063		ps = cma_select_ib_ps(id_priv);
3064	if (!ps)
3065		return -EPROTONOSUPPORT;
3066
3067	mutex_lock(&lock);
3068	if (cma_any_port(cma_src_addr(id_priv)))
3069		ret = cma_alloc_any_port(ps, id_priv);
3070	else
3071		ret = cma_use_port(ps, id_priv);
3072	mutex_unlock(&lock);
3073
3074	return ret;
3075}
3076
3077static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
3078			       struct sockaddr *addr)
3079{
3080#ifdef INET6
3081	struct sockaddr_in6 sin6;
3082
3083	if (addr->sa_family != AF_INET6)
3084		return 0;
3085
3086	sin6 = *(struct sockaddr_in6 *)addr;
3087
3088	if (!(IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)))
3089		return 0;
3090
3091	if (sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0)
3092		return -EINVAL;
3093
3094	dev_addr->bound_dev_if = sin6.sin6_scope_id;
3095#endif
3096	return 0;
3097}
3098
3099int rdma_listen(struct rdma_cm_id *id, int backlog)
3100{
3101	struct rdma_id_private *id_priv;
3102	int ret;
3103
3104	id_priv = container_of(id, struct rdma_id_private, id);
3105	if (id_priv->state == RDMA_CM_IDLE) {
3106		id->route.addr.src_addr.ss_family = AF_INET;
3107		ret = rdma_bind_addr(id, cma_src_addr(id_priv));
3108		if (ret)
3109			return ret;
3110	}
3111
3112	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
3113		return -EINVAL;
3114
3115	if (id_priv->reuseaddr) {
3116		ret = cma_bind_listen(id_priv);
3117		if (ret)
3118			goto err;
3119	}
3120
3121	id_priv->backlog = backlog;
3122	if (id->device) {
3123		if (rdma_cap_ib_cm(id->device, 1)) {
3124			ret = cma_ib_listen(id_priv);
3125			if (ret)
3126				goto err;
3127		} else if (rdma_cap_iw_cm(id->device, 1)) {
3128			ret = cma_iw_listen(id_priv, backlog);
3129			if (ret)
3130				goto err;
3131		} else {
3132			ret = -ENOSYS;
3133			goto err;
3134		}
3135	} else
3136		cma_listen_on_all(id_priv);
3137
3138	return 0;
3139err:
3140	id_priv->backlog = 0;
3141	cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
3142	return ret;
3143}
3144EXPORT_SYMBOL(rdma_listen);
3145
3146int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
3147{
3148	struct rdma_id_private *id_priv;
3149	int ret;
3150
3151	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
3152	    addr->sa_family != AF_IB)
3153		return -EAFNOSUPPORT;
3154
3155	id_priv = container_of(id, struct rdma_id_private, id);
3156	if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
3157		return -EINVAL;
3158
3159	ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
3160	if (ret)
3161		goto err1;
3162
3163	memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
3164	if (!cma_any_addr(addr)) {
3165		ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
3166		if (ret)
3167			goto err1;
3168
3169		ret = cma_acquire_dev(id_priv, NULL);
3170		if (ret)
3171			goto err1;
3172	}
3173
3174	if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
3175		if (addr->sa_family == AF_INET)
3176			id_priv->afonly = 1;
3177#ifdef INET6
3178		else if (addr->sa_family == AF_INET6) {
3179			CURVNET_SET_QUIET(id_priv->id.route.addr.dev_addr.net);
3180			id_priv->afonly = V_ip6_v6only;
3181			CURVNET_RESTORE();
3182		}
3183#endif
3184	}
3185	ret = cma_get_port(id_priv);
3186	if (ret)
3187		goto err2;
3188
3189	return 0;
3190err2:
3191	if (id_priv->cma_dev)
3192		cma_release_dev(id_priv);
3193err1:
3194	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
3195	return ret;
3196}
3197EXPORT_SYMBOL(rdma_bind_addr);
3198
3199static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
3200{
3201	struct cma_hdr *cma_hdr;
3202
3203	cma_hdr = hdr;
3204	cma_hdr->cma_version = CMA_VERSION;
3205	if (cma_family(id_priv) == AF_INET) {
3206		struct sockaddr_in *src4, *dst4;
3207
3208		src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
3209		dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
3210
3211		cma_set_ip_ver(cma_hdr, 4);
3212		cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
3213		cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
3214		cma_hdr->port = src4->sin_port;
3215	} else if (cma_family(id_priv) == AF_INET6) {
3216		struct sockaddr_in6 *src6, *dst6;
3217
3218		src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
3219		dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
3220
3221		cma_set_ip_ver(cma_hdr, 6);
3222		cma_hdr->src_addr.ip6 = src6->sin6_addr;
3223		cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
3224		cma_hdr->port = src6->sin6_port;
3225	}
3226	return 0;
3227}
3228
3229static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
3230				struct ib_cm_event *ib_event)
3231{
3232	struct rdma_id_private *id_priv = cm_id->context;
3233	struct rdma_cm_event event;
3234	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
3235	int ret = 0;
3236
3237	mutex_lock(&id_priv->handler_mutex);
3238	if (id_priv->state != RDMA_CM_CONNECT)
3239		goto out;
3240
3241	memset(&event, 0, sizeof event);
3242	switch (ib_event->event) {
3243	case IB_CM_SIDR_REQ_ERROR:
3244		event.event = RDMA_CM_EVENT_UNREACHABLE;
3245		event.status = -ETIMEDOUT;
3246		break;
3247	case IB_CM_SIDR_REP_RECEIVED:
3248		event.param.ud.private_data = ib_event->private_data;
3249		event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
3250		if (rep->status != IB_SIDR_SUCCESS) {
3251			event.event = RDMA_CM_EVENT_UNREACHABLE;
3252			event.status = ib_event->param.sidr_rep_rcvd.status;
3253			break;
3254		}
3255		ret = cma_set_qkey(id_priv, rep->qkey);
3256		if (ret) {
3257			event.event = RDMA_CM_EVENT_ADDR_ERROR;
3258			event.status = ret;
3259			break;
3260		}
3261		ret = ib_init_ah_from_path(id_priv->id.device,
3262					   id_priv->id.port_num,
3263					   id_priv->id.route.path_rec,
3264					   &event.param.ud.ah_attr);
3265		if (ret) {
3266			event.event = RDMA_CM_EVENT_ADDR_ERROR;
3267			event.status = ret;
3268			break;
3269		}
3270		event.param.ud.qp_num = rep->qpn;
3271		event.param.ud.qkey = rep->qkey;
3272		event.event = RDMA_CM_EVENT_ESTABLISHED;
3273		event.status = 0;
3274		break;
3275	default:
3276		pr_err("RDMA CMA: unexpected IB CM event: %d\n",
3277		       ib_event->event);
3278		goto out;
3279	}
3280
3281	ret = id_priv->id.event_handler(&id_priv->id, &event);
3282	if (ret) {
3283		/* Destroy the CM ID by returning a non-zero value. */
3284		id_priv->cm_id.ib = NULL;
3285		cma_exch(id_priv, RDMA_CM_DESTROYING);
3286		mutex_unlock(&id_priv->handler_mutex);
3287		rdma_destroy_id(&id_priv->id);
3288		return ret;
3289	}
3290out:
3291	mutex_unlock(&id_priv->handler_mutex);
3292	return ret;
3293}
3294
3295static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
3296			      struct rdma_conn_param *conn_param)
3297{
3298	struct ib_cm_sidr_req_param req;
3299	struct ib_cm_id	*id;
3300	void *private_data;
3301	int offset, ret;
3302
3303	memset(&req, 0, sizeof req);
3304	offset = cma_user_data_offset(id_priv);
3305	req.private_data_len = offset + conn_param->private_data_len;
3306	if (req.private_data_len < conn_param->private_data_len)
3307		return -EINVAL;
3308
3309	if (req.private_data_len) {
3310		private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
3311		if (!private_data)
3312			return -ENOMEM;
3313	} else {
3314		private_data = NULL;
3315	}
3316
3317	if (conn_param->private_data && conn_param->private_data_len)
3318		memcpy((char *)private_data + offset, conn_param->private_data,
3319		       conn_param->private_data_len);
3320
3321	if (private_data) {
3322		ret = cma_format_hdr(private_data, id_priv);
3323		if (ret)
3324			goto out;
3325		req.private_data = private_data;
3326	}
3327
3328	id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
3329			     id_priv);
3330	if (IS_ERR(id)) {
3331		ret = PTR_ERR(id);
3332		goto out;
3333	}
3334	id_priv->cm_id.ib = id;
3335
3336	req.path = id_priv->id.route.path_rec;
3337	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
3338	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
3339	req.max_cm_retries = CMA_MAX_CM_RETRIES;
3340
3341	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
3342	if (ret) {
3343		ib_destroy_cm_id(id_priv->cm_id.ib);
3344		id_priv->cm_id.ib = NULL;
3345	}
3346out:
3347	kfree(private_data);
3348	return ret;
3349}
3350
3351static int cma_connect_ib(struct rdma_id_private *id_priv,
3352			  struct rdma_conn_param *conn_param)
3353{
3354	struct ib_cm_req_param req;
3355	struct rdma_route *route;
3356	void *private_data;
3357	struct ib_cm_id	*id;
3358	int offset, ret;
3359
3360	memset(&req, 0, sizeof req);
3361	offset = cma_user_data_offset(id_priv);
3362	req.private_data_len = offset + conn_param->private_data_len;
3363	if (req.private_data_len < conn_param->private_data_len)
3364		return -EINVAL;
3365
3366	if (req.private_data_len) {
3367		private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
3368		if (!private_data)
3369			return -ENOMEM;
3370	} else {
3371		private_data = NULL;
3372	}
3373
3374	if (conn_param->private_data && conn_param->private_data_len)
3375		memcpy((char *)private_data + offset, conn_param->private_data,
3376		       conn_param->private_data_len);
3377
3378	id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv);
3379	if (IS_ERR(id)) {
3380		ret = PTR_ERR(id);
3381		goto out;
3382	}
3383	id_priv->cm_id.ib = id;
3384
3385	route = &id_priv->id.route;
3386	if (private_data) {
3387		ret = cma_format_hdr(private_data, id_priv);
3388		if (ret)
3389			goto out;
3390		req.private_data = private_data;
3391	}
3392
3393	req.primary_path = &route->path_rec[0];
3394	if (route->num_paths == 2)
3395		req.alternate_path = &route->path_rec[1];
3396
3397	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
3398	req.qp_num = id_priv->qp_num;
3399	req.qp_type = id_priv->id.qp_type;
3400	req.starting_psn = id_priv->seq_num;
3401	req.responder_resources = conn_param->responder_resources;
3402	req.initiator_depth = conn_param->initiator_depth;
3403	req.flow_control = conn_param->flow_control;
3404	req.retry_count = min_t(u8, 7, conn_param->retry_count);
3405	req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
3406	req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
3407	req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
3408	req.max_cm_retries = CMA_MAX_CM_RETRIES;
3409	req.srq = id_priv->srq ? 1 : 0;
3410
3411	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
3412out:
3413	if (ret && !IS_ERR(id)) {
3414		ib_destroy_cm_id(id);
3415		id_priv->cm_id.ib = NULL;
3416	}
3417
3418	kfree(private_data);
3419	return ret;
3420}
3421
3422static int cma_connect_iw(struct rdma_id_private *id_priv,
3423			  struct rdma_conn_param *conn_param)
3424{
3425	struct iw_cm_id *cm_id;
3426	int ret;
3427	struct iw_cm_conn_param iw_param;
3428
3429	cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
3430	if (IS_ERR(cm_id))
3431		return PTR_ERR(cm_id);
3432
3433	cm_id->tos = id_priv->tos;
3434	id_priv->cm_id.iw = cm_id;
3435
3436	memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
3437	       rdma_addr_size(cma_src_addr(id_priv)));
3438	memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv),
3439	       rdma_addr_size(cma_dst_addr(id_priv)));
3440
3441	ret = cma_modify_qp_rtr(id_priv, conn_param);
3442	if (ret)
3443		goto out;
3444
3445	if (conn_param) {
3446		iw_param.ord = conn_param->initiator_depth;
3447		iw_param.ird = conn_param->responder_resources;
3448		iw_param.private_data = conn_param->private_data;
3449		iw_param.private_data_len = conn_param->private_data_len;
3450		iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
3451	} else {
3452		memset(&iw_param, 0, sizeof iw_param);
3453		iw_param.qpn = id_priv->qp_num;
3454	}
3455	ret = iw_cm_connect(cm_id, &iw_param);
3456out:
3457	if (ret) {
3458		iw_destroy_cm_id(cm_id);
3459		id_priv->cm_id.iw = NULL;
3460	}
3461	return ret;
3462}
3463
3464int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
3465{
3466	struct rdma_id_private *id_priv;
3467	int ret;
3468
3469	id_priv = container_of(id, struct rdma_id_private, id);
3470	if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
3471		return -EINVAL;
3472
3473	if (!id->qp) {
3474		id_priv->qp_num = conn_param->qp_num;
3475		id_priv->srq = conn_param->srq;
3476	}
3477
3478	if (rdma_cap_ib_cm(id->device, id->port_num)) {
3479		if (id->qp_type == IB_QPT_UD)
3480			ret = cma_resolve_ib_udp(id_priv, conn_param);
3481		else
3482			ret = cma_connect_ib(id_priv, conn_param);
3483	} else if (rdma_cap_iw_cm(id->device, id->port_num))
3484		ret = cma_connect_iw(id_priv, conn_param);
3485	else
3486		ret = -ENOSYS;
3487	if (ret)
3488		goto err;
3489
3490	return 0;
3491err:
3492	cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
3493	return ret;
3494}
3495EXPORT_SYMBOL(rdma_connect);
3496
3497static int cma_accept_ib(struct rdma_id_private *id_priv,
3498			 struct rdma_conn_param *conn_param)
3499{
3500	struct ib_cm_rep_param rep;
3501	int ret;
3502
3503	ret = cma_modify_qp_rtr(id_priv, conn_param);
3504	if (ret)
3505		goto out;
3506
3507	ret = cma_modify_qp_rts(id_priv, conn_param);
3508	if (ret)
3509		goto out;
3510
3511	memset(&rep, 0, sizeof rep);
3512	rep.qp_num = id_priv->qp_num;
3513	rep.starting_psn = id_priv->seq_num;
3514	rep.private_data = conn_param->private_data;
3515	rep.private_data_len = conn_param->private_data_len;
3516	rep.responder_resources = conn_param->responder_resources;
3517	rep.initiator_depth = conn_param->initiator_depth;
3518	rep.failover_accepted = 0;
3519	rep.flow_control = conn_param->flow_control;
3520	rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
3521	rep.srq = id_priv->srq ? 1 : 0;
3522
3523	ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
3524out:
3525	return ret;
3526}
3527
3528static int cma_accept_iw(struct rdma_id_private *id_priv,
3529		  struct rdma_conn_param *conn_param)
3530{
3531	struct iw_cm_conn_param iw_param;
3532	int ret;
3533
3534	ret = cma_modify_qp_rtr(id_priv, conn_param);
3535	if (ret)
3536		return ret;
3537
3538	iw_param.ord = conn_param->initiator_depth;
3539	iw_param.ird = conn_param->responder_resources;
3540	iw_param.private_data = conn_param->private_data;
3541	iw_param.private_data_len = conn_param->private_data_len;
3542	if (id_priv->id.qp) {
3543		iw_param.qpn = id_priv->qp_num;
3544	} else
3545		iw_param.qpn = conn_param->qp_num;
3546
3547	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
3548}
3549
3550static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
3551			     enum ib_cm_sidr_status status, u32 qkey,
3552			     const void *private_data, int private_data_len)
3553{
3554	struct ib_cm_sidr_rep_param rep;
3555	int ret;
3556
3557	memset(&rep, 0, sizeof rep);
3558	rep.status = status;
3559	if (status == IB_SIDR_SUCCESS) {
3560		ret = cma_set_qkey(id_priv, qkey);
3561		if (ret)
3562			return ret;
3563		rep.qp_num = id_priv->qp_num;
3564		rep.qkey = id_priv->qkey;
3565	}
3566	rep.private_data = private_data;
3567	rep.private_data_len = private_data_len;
3568
3569	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
3570}
3571
3572int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
3573{
3574	struct rdma_id_private *id_priv;
3575	int ret;
3576
3577	id_priv = container_of(id, struct rdma_id_private, id);
3578
3579	id_priv->owner = task_pid_nr(current);
3580
3581	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
3582		return -EINVAL;
3583
3584	if (!id->qp && conn_param) {
3585		id_priv->qp_num = conn_param->qp_num;
3586		id_priv->srq = conn_param->srq;
3587	}
3588
3589	if (rdma_cap_ib_cm(id->device, id->port_num)) {
3590		if (id->qp_type == IB_QPT_UD) {
3591			if (conn_param)
3592				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
3593							conn_param->qkey,
3594							conn_param->private_data,
3595							conn_param->private_data_len);
3596			else
3597				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
3598							0, NULL, 0);
3599		} else {
3600			if (conn_param)
3601				ret = cma_accept_ib(id_priv, conn_param);
3602			else
3603				ret = cma_rep_recv(id_priv);
3604		}
3605	} else if (rdma_cap_iw_cm(id->device, id->port_num))
3606		ret = cma_accept_iw(id_priv, conn_param);
3607	else
3608		ret = -ENOSYS;
3609
3610	if (ret)
3611		goto reject;
3612
3613	return 0;
3614reject:
3615	cma_modify_qp_err(id_priv);
3616	rdma_reject(id, NULL, 0);
3617	return ret;
3618}
3619EXPORT_SYMBOL(rdma_accept);
3620
3621int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
3622{
3623	struct rdma_id_private *id_priv;
3624	int ret;
3625
3626	id_priv = container_of(id, struct rdma_id_private, id);
3627	if (!id_priv->cm_id.ib)
3628		return -EINVAL;
3629
3630	switch (id->device->node_type) {
3631	case RDMA_NODE_IB_CA:
3632		ret = ib_cm_notify(id_priv->cm_id.ib, event);
3633		break;
3634	default:
3635		ret = 0;
3636		break;
3637	}
3638	return ret;
3639}
3640EXPORT_SYMBOL(rdma_notify);
3641
3642int rdma_reject(struct rdma_cm_id *id, const void *private_data,
3643		u8 private_data_len)
3644{
3645	struct rdma_id_private *id_priv;
3646	int ret;
3647
3648	id_priv = container_of(id, struct rdma_id_private, id);
3649	if (!id_priv->cm_id.ib)
3650		return -EINVAL;
3651
3652	if (rdma_cap_ib_cm(id->device, id->port_num)) {
3653		if (id->qp_type == IB_QPT_UD)
3654			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
3655						private_data, private_data_len);
3656		else
3657			ret = ib_send_cm_rej(id_priv->cm_id.ib,
3658					     IB_CM_REJ_CONSUMER_DEFINED, NULL,
3659					     0, private_data, private_data_len);
3660	} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
3661		ret = iw_cm_reject(id_priv->cm_id.iw,
3662				   private_data, private_data_len);
3663	} else
3664		ret = -ENOSYS;
3665
3666	return ret;
3667}
3668EXPORT_SYMBOL(rdma_reject);
3669
3670int rdma_disconnect(struct rdma_cm_id *id)
3671{
3672	struct rdma_id_private *id_priv;
3673	int ret;
3674
3675	id_priv = container_of(id, struct rdma_id_private, id);
3676	if (!id_priv->cm_id.ib)
3677		return -EINVAL;
3678
3679	if (rdma_cap_ib_cm(id->device, id->port_num)) {
3680		ret = cma_modify_qp_err(id_priv);
3681		if (ret)
3682			goto out;
3683		/* Initiate or respond to a disconnect. */
3684		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
3685			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
3686	} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
3687		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
3688	} else
3689		ret = -EINVAL;
3690
3691out:
3692	return ret;
3693}
3694EXPORT_SYMBOL(rdma_disconnect);
3695
3696static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
3697{
3698	struct rdma_id_private *id_priv;
3699	struct cma_multicast *mc = multicast->context;
3700	struct rdma_cm_event event;
3701	int ret = 0;
3702
3703	id_priv = mc->id_priv;
3704	mutex_lock(&id_priv->handler_mutex);
3705	if (id_priv->state != RDMA_CM_ADDR_BOUND &&
3706	    id_priv->state != RDMA_CM_ADDR_RESOLVED)
3707		goto out;
3708
3709	if (!status)
3710		status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
3711	mutex_lock(&id_priv->qp_mutex);
3712	if (!status && id_priv->id.qp)
3713		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
3714					 be16_to_cpu(multicast->rec.mlid));
3715	mutex_unlock(&id_priv->qp_mutex);
3716
3717	memset(&event, 0, sizeof event);
3718	event.status = status;
3719	event.param.ud.private_data = mc->context;
3720	if (!status) {
3721		struct rdma_dev_addr *dev_addr =
3722			&id_priv->id.route.addr.dev_addr;
3723		struct net_device *ndev =
3724			dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
3725		enum ib_gid_type gid_type =
3726			id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
3727			rdma_start_port(id_priv->cma_dev->device)];
3728
3729		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
3730		ib_init_ah_from_mcmember(id_priv->id.device,
3731					 id_priv->id.port_num, &multicast->rec,
3732					 ndev, gid_type,
3733					 &event.param.ud.ah_attr);
3734		event.param.ud.qp_num = 0xFFFFFF;
3735		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
3736		if (ndev)
3737			dev_put(ndev);
3738	} else
3739		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
3740
3741	ret = id_priv->id.event_handler(&id_priv->id, &event);
3742	if (ret) {
3743		cma_exch(id_priv, RDMA_CM_DESTROYING);
3744		mutex_unlock(&id_priv->handler_mutex);
3745		rdma_destroy_id(&id_priv->id);
3746		return 0;
3747	}
3748
3749out:
3750	mutex_unlock(&id_priv->handler_mutex);
3751	return 0;
3752}
3753
3754static void cma_set_mgid(struct rdma_id_private *id_priv,
3755			 struct sockaddr *addr, union ib_gid *mgid)
3756{
3757	unsigned char mc_map[MAX_ADDR_LEN];
3758	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3759	struct sockaddr_in *sin = (struct sockaddr_in *) addr;
3760	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
3761
3762	if (cma_any_addr(addr)) {
3763		memset(mgid, 0, sizeof *mgid);
3764	} else if ((addr->sa_family == AF_INET6) &&
3765		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
3766								 0xFF10A01B)) {
3767		/* IPv6 address is an SA assigned MGID. */
3768		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3769	} else if (addr->sa_family == AF_IB) {
3770		memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
3771	} else if (addr->sa_family == AF_INET6) {
3772		ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
3773		if (id_priv->id.ps == RDMA_PS_UDP)
3774			mc_map[7] = 0x01;	/* Use RDMA CM signature */
3775		*mgid = *(union ib_gid *) (mc_map + 4);
3776	} else {
3777		ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
3778		if (id_priv->id.ps == RDMA_PS_UDP)
3779			mc_map[7] = 0x01;	/* Use RDMA CM signature */
3780		*mgid = *(union ib_gid *) (mc_map + 4);
3781	}
3782}
3783
3784static void cma_query_sa_classport_info_cb(int status,
3785					   struct ib_class_port_info *rec,
3786					   void *context)
3787{
3788	struct class_port_info_context *cb_ctx = context;
3789
3790	WARN_ON(!context);
3791
3792	if (status || !rec) {
3793		pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n",
3794			 cb_ctx->device->name, cb_ctx->port_num, status);
3795		goto out;
3796	}
3797
3798	memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info));
3799
3800out:
3801	complete(&cb_ctx->done);
3802}
3803
3804static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num,
3805				       struct ib_class_port_info *class_port_info)
3806{
3807	struct class_port_info_context *cb_ctx;
3808	int ret;
3809
3810	cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL);
3811	if (!cb_ctx)
3812		return -ENOMEM;
3813
3814	cb_ctx->device = device;
3815	cb_ctx->class_port_info = class_port_info;
3816	cb_ctx->port_num = port_num;
3817	init_completion(&cb_ctx->done);
3818
3819	ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num,
3820					     CMA_QUERY_CLASSPORT_INFO_TIMEOUT,
3821					     GFP_KERNEL, cma_query_sa_classport_info_cb,
3822					     cb_ctx, &cb_ctx->sa_query);
3823	if (ret < 0) {
3824		pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n",
3825		       device->name, port_num, ret);
3826		goto out;
3827	}
3828
3829	wait_for_completion(&cb_ctx->done);
3830
3831out:
3832	kfree(cb_ctx);
3833	return ret;
3834}
3835
3836static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
3837				 struct cma_multicast *mc)
3838{
3839	struct ib_sa_mcmember_rec rec;
3840	struct ib_class_port_info class_port_info;
3841	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3842	ib_sa_comp_mask comp_mask;
3843	int ret;
3844
3845	ib_addr_get_mgid(dev_addr, &rec.mgid);
3846	ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
3847				     &rec.mgid, &rec);
3848	if (ret)
3849		return ret;
3850
3851	ret = cma_set_qkey(id_priv, 0);
3852	if (ret)
3853		return ret;
3854
3855	cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
3856	rec.qkey = cpu_to_be32(id_priv->qkey);
3857	rdma_addr_get_sgid(dev_addr, &rec.port_gid);
3858	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
3859	rec.join_state = mc->join_state;
3860
3861	if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) {
3862		ret = cma_query_sa_classport_info(id_priv->id.device,
3863						  id_priv->id.port_num,
3864						  &class_port_info);
3865
3866		if (ret)
3867			return ret;
3868
3869		if (!(ib_get_cpi_capmask2(&class_port_info) &
3870		      IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) {
3871			pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
3872				"RDMA CM: SM doesn't support Send Only Full Member option\n",
3873				id_priv->id.device->name, id_priv->id.port_num);
3874			return -EOPNOTSUPP;
3875		}
3876	}
3877
3878	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
3879		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
3880		    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
3881		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
3882		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
3883
3884	if (id_priv->id.ps == RDMA_PS_IPOIB)
3885		comp_mask |= IB_SA_MCMEMBER_REC_RATE |
3886			     IB_SA_MCMEMBER_REC_RATE_SELECTOR |
3887			     IB_SA_MCMEMBER_REC_MTU_SELECTOR |
3888			     IB_SA_MCMEMBER_REC_MTU |
3889			     IB_SA_MCMEMBER_REC_HOP_LIMIT;
3890
3891	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
3892						id_priv->id.port_num, &rec,
3893						comp_mask, GFP_KERNEL,
3894						cma_ib_mc_handler, mc);
3895	return PTR_ERR_OR_ZERO(mc->multicast.ib);
3896}
3897
3898static void iboe_mcast_work_handler(struct work_struct *work)
3899{
3900	struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
3901	struct cma_multicast *mc = mw->mc;
3902	struct ib_sa_multicast *m = mc->multicast.ib;
3903
3904	mc->multicast.ib->context = mc;
3905	cma_ib_mc_handler(0, m);
3906	kref_put(&mc->mcref, release_mc);
3907	kfree(mw);
3908}
3909
3910static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
3911{
3912	struct sockaddr_in *sin = (struct sockaddr_in *)addr;
3913	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
3914
3915	if (cma_any_addr(addr)) {
3916		memset(mgid, 0, sizeof *mgid);
3917	} else if (addr->sa_family == AF_INET6) {
3918		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3919	} else {
3920		mgid->raw[0] = 0xff;
3921		mgid->raw[1] = 0x0e;
3922		mgid->raw[2] = 0;
3923		mgid->raw[3] = 0;
3924		mgid->raw[4] = 0;
3925		mgid->raw[5] = 0;
3926		mgid->raw[6] = 0;
3927		mgid->raw[7] = 0;
3928		mgid->raw[8] = 0;
3929		mgid->raw[9] = 0;
3930		mgid->raw[10] = 0xff;
3931		mgid->raw[11] = 0xff;
3932		*(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
3933	}
3934}
3935
3936static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
3937				   struct cma_multicast *mc)
3938{
3939	struct iboe_mcast_work *work;
3940	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3941	int err = 0;
3942	struct sockaddr *addr = (struct sockaddr *)&mc->addr;
3943	struct net_device *ndev = NULL;
3944	enum ib_gid_type gid_type;
3945	bool send_only;
3946
3947	send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
3948
3949	if (cma_zero_addr((struct sockaddr *)&mc->addr))
3950		return -EINVAL;
3951
3952	work = kzalloc(sizeof *work, GFP_KERNEL);
3953	if (!work)
3954		return -ENOMEM;
3955
3956	mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
3957	if (!mc->multicast.ib) {
3958		err = -ENOMEM;
3959		goto out1;
3960	}
3961
3962	cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
3963
3964	mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
3965	if (id_priv->id.ps == RDMA_PS_UDP)
3966		mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
3967
3968	if (dev_addr->bound_dev_if)
3969		ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
3970	if (!ndev) {
3971		err = -ENODEV;
3972		goto out2;
3973	}
3974	mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
3975	mc->multicast.ib->rec.hop_limit = 1;
3976	mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu);
3977
3978	gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
3979		   rdma_start_port(id_priv->cma_dev->device)];
3980	if (addr->sa_family == AF_INET) {
3981		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
3982			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
3983			if (!send_only) {
3984				mc->igmp_joined = true;
3985			}
3986		}
3987	} else {
3988		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
3989			err = -ENOTSUPP;
3990	}
3991	dev_put(ndev);
3992	if (err || !mc->multicast.ib->rec.mtu) {
3993		if (!err)
3994			err = -EINVAL;
3995		goto out2;
3996	}
3997	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
3998		    &mc->multicast.ib->rec.port_gid);
3999	work->id = id_priv;
4000	work->mc = mc;
4001	INIT_WORK(&work->work, iboe_mcast_work_handler);
4002	kref_get(&mc->mcref);
4003	queue_work(cma_wq, &work->work);
4004
4005	return 0;
4006
4007out2:
4008	kfree(mc->multicast.ib);
4009out1:
4010	kfree(work);
4011	return err;
4012}
4013
4014int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
4015			u8 join_state, void *context)
4016{
4017	struct rdma_id_private *id_priv;
4018	struct cma_multicast *mc;
4019	int ret;
4020
4021	id_priv = container_of(id, struct rdma_id_private, id);
4022	if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
4023	    !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
4024		return -EINVAL;
4025
4026	mc = kmalloc(sizeof *mc, GFP_KERNEL);
4027	if (!mc)
4028		return -ENOMEM;
4029
4030	memcpy(&mc->addr, addr, rdma_addr_size(addr));
4031	mc->context = context;
4032	mc->id_priv = id_priv;
4033	mc->igmp_joined = false;
4034	mc->join_state = join_state;
4035	spin_lock(&id_priv->lock);
4036	list_add(&mc->list, &id_priv->mc_list);
4037	spin_unlock(&id_priv->lock);
4038
4039	if (rdma_protocol_roce(id->device, id->port_num)) {
4040		kref_init(&mc->mcref);
4041		ret = cma_iboe_join_multicast(id_priv, mc);
4042	} else if (rdma_cap_ib_mcast(id->device, id->port_num))
4043		ret = cma_join_ib_multicast(id_priv, mc);
4044	else
4045		ret = -ENOSYS;
4046
4047	if (ret) {
4048		spin_lock_irq(&id_priv->lock);
4049		list_del(&mc->list);
4050		spin_unlock_irq(&id_priv->lock);
4051		kfree(mc);
4052	}
4053	return ret;
4054}
4055EXPORT_SYMBOL(rdma_join_multicast);
4056
4057void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
4058{
4059	struct rdma_id_private *id_priv;
4060	struct cma_multicast *mc;
4061
4062	id_priv = container_of(id, struct rdma_id_private, id);
4063	spin_lock_irq(&id_priv->lock);
4064	list_for_each_entry(mc, &id_priv->mc_list, list) {
4065		if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
4066			list_del(&mc->list);
4067			spin_unlock_irq(&id_priv->lock);
4068
4069			if (id->qp)
4070				ib_detach_mcast(id->qp,
4071						&mc->multicast.ib->rec.mgid,
4072						be16_to_cpu(mc->multicast.ib->rec.mlid));
4073
4074			BUG_ON(id_priv->cma_dev->device != id->device);
4075
4076			if (rdma_cap_ib_mcast(id->device, id->port_num)) {
4077				ib_sa_free_multicast(mc->multicast.ib);
4078				kfree(mc);
4079			} else if (rdma_protocol_roce(id->device, id->port_num)) {
4080				if (mc->igmp_joined) {
4081					struct rdma_dev_addr *dev_addr =
4082						&id->route.addr.dev_addr;
4083					struct net_device *ndev = NULL;
4084
4085					if (dev_addr->bound_dev_if)
4086						ndev = dev_get_by_index(dev_addr->net,
4087									dev_addr->bound_dev_if);
4088					if (ndev) {
4089						dev_put(ndev);
4090					}
4091					mc->igmp_joined = false;
4092				}
4093				kref_put(&mc->mcref, release_mc);
4094			}
4095			return;
4096		}
4097	}
4098	spin_unlock_irq(&id_priv->lock);
4099}
4100EXPORT_SYMBOL(rdma_leave_multicast);
4101
4102static int
4103sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS)
4104{
4105	struct cma_device *cma_dev = arg1;
4106	const int port = arg2;
4107	char buf[64];
4108	int error;
4109
4110	strlcpy(buf, ib_cache_gid_type_str(
4111	    cma_get_default_gid_type(cma_dev, port)), sizeof(buf));
4112
4113	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
4114	if (error != 0 || req->newptr == NULL)
4115		goto done;
4116
4117	error = ib_cache_gid_parse_type_str(buf);
4118	if (error < 0) {
4119		error = EINVAL;
4120		goto done;
4121	}
4122
4123	cma_set_default_gid_type(cma_dev, port, error);
4124	error = 0;
4125done:
4126	return (error);
4127}
4128
4129static void cma_add_one(struct ib_device *device)
4130{
4131	struct cma_device *cma_dev;
4132	struct rdma_id_private *id_priv;
4133	unsigned int i;
4134	unsigned long supported_gids = 0;
4135
4136	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
4137	if (!cma_dev)
4138		return;
4139
4140	sysctl_ctx_init(&cma_dev->sysctl_ctx);
4141
4142	cma_dev->device = device;
4143	cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
4144					    sizeof(*cma_dev->default_gid_type),
4145					    GFP_KERNEL);
4146	if (!cma_dev->default_gid_type) {
4147		kfree(cma_dev);
4148		return;
4149	}
4150	for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
4151		supported_gids = roce_gid_type_mask_support(device, i);
4152		WARN_ON(!supported_gids);
4153		cma_dev->default_gid_type[i - rdma_start_port(device)] =
4154			find_first_bit(&supported_gids, BITS_PER_LONG);
4155	}
4156
4157	init_completion(&cma_dev->comp);
4158	atomic_set(&cma_dev->refcount, 1);
4159	INIT_LIST_HEAD(&cma_dev->id_list);
4160	ib_set_client_data(device, &cma_client, cma_dev);
4161
4162	mutex_lock(&lock);
4163	list_add_tail(&cma_dev->list, &dev_list);
4164	list_for_each_entry(id_priv, &listen_any_list, list)
4165		cma_listen_on_dev(id_priv, cma_dev);
4166	mutex_unlock(&lock);
4167
4168	for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
4169		char buf[64];
4170
4171		snprintf(buf, sizeof(buf), "default_roce_mode_port%d", i);
4172
4173		(void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx,
4174		    SYSCTL_CHILDREN(device->ports_parent->parent->oidp),
4175		    OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
4176		    cma_dev, i, &sysctl_cma_default_roce_mode, "A",
4177		    "Default RoCE mode. Valid values: IB/RoCE v1 and RoCE v2");
4178	}
4179}
4180
4181static int cma_remove_id_dev(struct rdma_id_private *id_priv)
4182{
4183	struct rdma_cm_event event;
4184	enum rdma_cm_state state;
4185	int ret = 0;
4186
4187	/* Record that we want to remove the device */
4188	state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
4189	if (state == RDMA_CM_DESTROYING)
4190		return 0;
4191
4192	cma_cancel_operation(id_priv, state);
4193	mutex_lock(&id_priv->handler_mutex);
4194
4195	/* Check for destruction from another callback. */
4196	if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
4197		goto out;
4198
4199	memset(&event, 0, sizeof event);
4200	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
4201	ret = id_priv->id.event_handler(&id_priv->id, &event);
4202out:
4203	mutex_unlock(&id_priv->handler_mutex);
4204	return ret;
4205}
4206
4207static void cma_process_remove(struct cma_device *cma_dev)
4208{
4209	struct rdma_id_private *id_priv;
4210	int ret;
4211
4212	mutex_lock(&lock);
4213	while (!list_empty(&cma_dev->id_list)) {
4214		id_priv = list_entry(cma_dev->id_list.next,
4215				     struct rdma_id_private, list);
4216
4217		list_del(&id_priv->listen_list);
4218		list_del_init(&id_priv->list);
4219		atomic_inc(&id_priv->refcount);
4220		mutex_unlock(&lock);
4221
4222		ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
4223		cma_deref_id(id_priv);
4224		if (ret)
4225			rdma_destroy_id(&id_priv->id);
4226
4227		mutex_lock(&lock);
4228	}
4229	mutex_unlock(&lock);
4230
4231	cma_deref_dev(cma_dev);
4232	wait_for_completion(&cma_dev->comp);
4233}
4234
4235static void cma_remove_one(struct ib_device *device, void *client_data)
4236{
4237	struct cma_device *cma_dev = client_data;
4238
4239	if (!cma_dev)
4240		return;
4241
4242	mutex_lock(&lock);
4243	list_del(&cma_dev->list);
4244	mutex_unlock(&lock);
4245
4246	cma_process_remove(cma_dev);
4247	sysctl_ctx_free(&cma_dev->sysctl_ctx);
4248	kfree(cma_dev->default_gid_type);
4249	kfree(cma_dev);
4250}
4251
4252static void cma_init_vnet(void *arg)
4253{
4254	struct cma_pernet *pernet = &VNET(cma_pernet);
4255
4256	idr_init(&pernet->tcp_ps);
4257	idr_init(&pernet->udp_ps);
4258	idr_init(&pernet->ipoib_ps);
4259	idr_init(&pernet->ib_ps);
4260}
4261VNET_SYSINIT(cma_init_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_FIRST, cma_init_vnet, NULL);
4262
4263static void cma_destroy_vnet(void *arg)
4264{
4265	struct cma_pernet *pernet = &VNET(cma_pernet);
4266
4267	idr_destroy(&pernet->tcp_ps);
4268	idr_destroy(&pernet->udp_ps);
4269	idr_destroy(&pernet->ipoib_ps);
4270	idr_destroy(&pernet->ib_ps);
4271}
4272VNET_SYSUNINIT(cma_destroy_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_SECOND, cma_destroy_vnet, NULL);
4273
4274static int __init cma_init(void)
4275{
4276	int ret;
4277
4278	cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
4279	if (!cma_wq)
4280		return -ENOMEM;
4281
4282	ib_sa_register_client(&sa_client);
4283	rdma_addr_register_client(&addr_client);
4284
4285	ret = ib_register_client(&cma_client);
4286	if (ret)
4287		goto err;
4288
4289	cma_configfs_init();
4290
4291	return 0;
4292
4293err:
4294	rdma_addr_unregister_client(&addr_client);
4295	ib_sa_unregister_client(&sa_client);
4296	destroy_workqueue(cma_wq);
4297	return ret;
4298}
4299
4300static void __exit cma_cleanup(void)
4301{
4302	cma_configfs_exit();
4303	ib_unregister_client(&cma_client);
4304	rdma_addr_unregister_client(&addr_client);
4305	ib_sa_unregister_client(&sa_client);
4306	destroy_workqueue(cma_wq);
4307}
4308
4309module_init(cma_init);
4310module_exit(cma_cleanup);
4311