ib_cma.c revision 60042730d62e0f3d377ee60c2437b22fea4fb971
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
5 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
6 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
7 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
8 *
9 * This software is available to you under a choice of one of two
10 * licenses.  You may choose to be licensed under the terms of the GNU
11 * General Public License (GPL) Version 2, available from the file
12 * COPYING in the main directory of this source tree, or the
13 * OpenIB.org BSD license below:
14 *
15 *     Redistribution and use in source and binary forms, with or
16 *     without modification, are permitted provided that the following
17 *     conditions are met:
18 *
19 *      - Redistributions of source code must retain the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer.
22 *
23 *      - Redistributions in binary form must reproduce the above
24 *        copyright notice, this list of conditions and the following
25 *        disclaimer in the documentation and/or other materials
26 *        provided with the distribution.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 * SOFTWARE.
36 */
37
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD$");
40
41#define	LINUXKPI_PARAM_PREFIX ibcore_
42
43#include <linux/completion.h>
44#include <linux/in.h>
45#include <linux/in6.h>
46#include <linux/mutex.h>
47#include <linux/random.h>
48#include <linux/idr.h>
49#include <linux/inetdevice.h>
50#include <linux/slab.h>
51#include <linux/module.h>
52#include <net/route.h>
53#include <net/route/nhop.h>
54
55#include <net/tcp.h>
56#include <net/ipv6.h>
57
58#include <netinet/in_fib.h>
59
60#include <netinet6/in6_fib.h>
61#include <netinet6/scope6_var.h>
62#include <netinet6/ip6_var.h>
63
64#include <rdma/rdma_cm.h>
65#include <rdma/rdma_cm_ib.h>
66#include <rdma/rdma_sdp.h>
67#include <rdma/ib.h>
68#include <rdma/ib_addr.h>
69#include <rdma/ib_cache.h>
70#include <rdma/ib_cm.h>
71#include <rdma/ib_sa.h>
72#include <rdma/iw_cm.h>
73
74#include <sys/priv.h>
75
76#include "core_priv.h"
77
78MODULE_AUTHOR("Sean Hefty");
79MODULE_DESCRIPTION("Generic RDMA CM Agent");
80MODULE_LICENSE("Dual BSD/GPL");
81
82#define CMA_CM_RESPONSE_TIMEOUT 20
83#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
84#define CMA_MAX_CM_RETRIES 15
85#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
86#define CMA_IBOE_PACKET_LIFETIME 18
87
88static const char * const cma_events[] = {
89	[RDMA_CM_EVENT_ADDR_RESOLVED]	 = "address resolved",
90	[RDMA_CM_EVENT_ADDR_ERROR]	 = "address error",
91	[RDMA_CM_EVENT_ROUTE_RESOLVED]	 = "route resolved ",
92	[RDMA_CM_EVENT_ROUTE_ERROR]	 = "route error",
93	[RDMA_CM_EVENT_CONNECT_REQUEST]	 = "connect request",
94	[RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response",
95	[RDMA_CM_EVENT_CONNECT_ERROR]	 = "connect error",
96	[RDMA_CM_EVENT_UNREACHABLE]	 = "unreachable",
97	[RDMA_CM_EVENT_REJECTED]	 = "rejected",
98	[RDMA_CM_EVENT_ESTABLISHED]	 = "established",
99	[RDMA_CM_EVENT_DISCONNECTED]	 = "disconnected",
100	[RDMA_CM_EVENT_DEVICE_REMOVAL]	 = "device removal",
101	[RDMA_CM_EVENT_MULTICAST_JOIN]	 = "multicast join",
102	[RDMA_CM_EVENT_MULTICAST_ERROR]	 = "multicast error",
103	[RDMA_CM_EVENT_ADDR_CHANGE]	 = "address change",
104	[RDMA_CM_EVENT_TIMEWAIT_EXIT]	 = "timewait exit",
105};
106
107const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
108{
109	size_t index = event;
110
111	return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ?
112			cma_events[index] : "unrecognized event";
113}
114EXPORT_SYMBOL(rdma_event_msg);
115
116static int cma_check_linklocal(struct rdma_dev_addr *, struct sockaddr *);
117static void cma_add_one(struct ib_device *device);
118static void cma_remove_one(struct ib_device *device, void *client_data);
119static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id);
120
121static struct ib_client cma_client = {
122	.name   = "cma",
123	.add    = cma_add_one,
124	.remove = cma_remove_one
125};
126
127static struct ib_sa_client sa_client;
128static struct rdma_addr_client addr_client;
129static LIST_HEAD(dev_list);
130static LIST_HEAD(listen_any_list);
131static DEFINE_MUTEX(lock);
132static struct workqueue_struct *cma_wq;
133
134struct cma_pernet {
135	struct idr tcp_ps;
136	struct idr udp_ps;
137	struct idr ipoib_ps;
138	struct idr ib_ps;
139	struct idr sdp_ps;
140};
141
142VNET_DEFINE(struct cma_pernet, cma_pernet);
143
144static struct cma_pernet *cma_pernet_ptr(struct vnet *vnet)
145{
146	struct cma_pernet *retval;
147
148	CURVNET_SET_QUIET(vnet);
149	retval = &VNET(cma_pernet);
150	CURVNET_RESTORE();
151
152	return (retval);
153}
154
155static struct idr *cma_pernet_idr(struct vnet *net, enum rdma_port_space ps)
156{
157	struct cma_pernet *pernet = cma_pernet_ptr(net);
158
159	switch (ps) {
160	case RDMA_PS_TCP:
161		return &pernet->tcp_ps;
162	case RDMA_PS_UDP:
163		return &pernet->udp_ps;
164	case RDMA_PS_IPOIB:
165		return &pernet->ipoib_ps;
166	case RDMA_PS_IB:
167		return &pernet->ib_ps;
168	case RDMA_PS_SDP:
169		return &pernet->sdp_ps;
170	default:
171		return NULL;
172	}
173}
174
175struct cma_device {
176	struct list_head	list;
177	struct ib_device	*device;
178	struct completion	comp;
179	atomic_t		refcount;
180	struct list_head	id_list;
181	struct sysctl_ctx_list	sysctl_ctx;
182	enum ib_gid_type	*default_gid_type;
183};
184
185struct rdma_bind_list {
186	enum rdma_port_space	ps;
187	struct hlist_head	owners;
188	unsigned short		port;
189};
190
191struct class_port_info_context {
192	struct ib_class_port_info	*class_port_info;
193	struct ib_device		*device;
194	struct completion		done;
195	struct ib_sa_query		*sa_query;
196	u8				port_num;
197};
198
199static int cma_ps_alloc(struct vnet *vnet, enum rdma_port_space ps,
200			struct rdma_bind_list *bind_list, int snum)
201{
202	struct idr *idr = cma_pernet_idr(vnet, ps);
203
204	return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
205}
206
207static struct rdma_bind_list *cma_ps_find(struct vnet *net,
208					  enum rdma_port_space ps, int snum)
209{
210	struct idr *idr = cma_pernet_idr(net, ps);
211
212	return idr_find(idr, snum);
213}
214
215static void cma_ps_remove(struct vnet *net, enum rdma_port_space ps, int snum)
216{
217	struct idr *idr = cma_pernet_idr(net, ps);
218
219	idr_remove(idr, snum);
220}
221
222enum {
223	CMA_OPTION_AFONLY,
224};
225
226void cma_ref_dev(struct cma_device *cma_dev)
227{
228	atomic_inc(&cma_dev->refcount);
229}
230
231struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter	filter,
232					     void		*cookie)
233{
234	struct cma_device *cma_dev;
235	struct cma_device *found_cma_dev = NULL;
236
237	mutex_lock(&lock);
238
239	list_for_each_entry(cma_dev, &dev_list, list)
240		if (filter(cma_dev->device, cookie)) {
241			found_cma_dev = cma_dev;
242			break;
243		}
244
245	if (found_cma_dev)
246		cma_ref_dev(found_cma_dev);
247	mutex_unlock(&lock);
248	return found_cma_dev;
249}
250
251int cma_get_default_gid_type(struct cma_device *cma_dev,
252			     unsigned int port)
253{
254	if (port < rdma_start_port(cma_dev->device) ||
255	    port > rdma_end_port(cma_dev->device))
256		return -EINVAL;
257
258	return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
259}
260
261int cma_set_default_gid_type(struct cma_device *cma_dev,
262			     unsigned int port,
263			     enum ib_gid_type default_gid_type)
264{
265	unsigned long supported_gids;
266
267	if (port < rdma_start_port(cma_dev->device) ||
268	    port > rdma_end_port(cma_dev->device))
269		return -EINVAL;
270
271	supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
272
273	if (!(supported_gids & 1 << default_gid_type))
274		return -EINVAL;
275
276	cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
277		default_gid_type;
278
279	return 0;
280}
281
282struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
283{
284	return cma_dev->device;
285}
286
287/*
288 * Device removal can occur at anytime, so we need extra handling to
289 * serialize notifying the user of device removal with other callbacks.
290 * We do this by disabling removal notification while a callback is in process,
291 * and reporting it after the callback completes.
292 */
293struct rdma_id_private {
294	struct rdma_cm_id	id;
295
296	struct rdma_bind_list	*bind_list;
297	struct hlist_node	node;
298	struct list_head	list; /* listen_any_list or cma_device.list */
299	struct list_head	listen_list; /* per device listens */
300	struct cma_device	*cma_dev;
301	struct list_head	mc_list;
302
303	int			internal_id;
304	enum rdma_cm_state	state;
305	spinlock_t		lock;
306	struct mutex		qp_mutex;
307
308	struct completion	comp;
309	atomic_t		refcount;
310	struct mutex		handler_mutex;
311
312	int			backlog;
313	int			timeout_ms;
314	struct ib_sa_query	*query;
315	int			query_id;
316	union {
317		struct ib_cm_id	*ib;
318		struct iw_cm_id	*iw;
319	} cm_id;
320
321	u32			seq_num;
322	u32			qkey;
323	u32			qp_num;
324	pid_t			owner;
325	u32			options;
326	u8			srq;
327	u8			tos;
328	u8			reuseaddr;
329	u8			afonly;
330	enum ib_gid_type	gid_type;
331};
332
333struct cma_multicast {
334	struct rdma_id_private *id_priv;
335	union {
336		struct ib_sa_multicast *ib;
337	} multicast;
338	struct list_head	list;
339	void			*context;
340	struct sockaddr_storage	addr;
341	struct kref		mcref;
342	bool			igmp_joined;
343	u8			join_state;
344};
345
346struct cma_work {
347	struct work_struct	work;
348	struct rdma_id_private	*id;
349	enum rdma_cm_state	old_state;
350	enum rdma_cm_state	new_state;
351	struct rdma_cm_event	event;
352};
353
354struct cma_ndev_work {
355	struct work_struct	work;
356	struct rdma_id_private	*id;
357	struct rdma_cm_event	event;
358};
359
360struct iboe_mcast_work {
361	struct work_struct	 work;
362	struct rdma_id_private	*id;
363	struct cma_multicast	*mc;
364};
365
366struct cma_hdr {
367	u8 cma_version;
368	u8 ip_version;	/* IP version: 7:4 */
369	__be16 port;
370	union cma_ip_addr src_addr;
371	union cma_ip_addr dst_addr;
372};
373
374#define CMA_VERSION 0x00
375#define SDP_MAJ_VERSION 0x2
376
377struct cma_req_info {
378	struct ib_device *device;
379	int port;
380	union ib_gid local_gid;
381	__be64 service_id;
382	u16 pkey;
383	bool has_gid:1;
384};
385
386static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
387{
388	unsigned long flags;
389	int ret;
390
391	spin_lock_irqsave(&id_priv->lock, flags);
392	ret = (id_priv->state == comp);
393	spin_unlock_irqrestore(&id_priv->lock, flags);
394	return ret;
395}
396
397static int cma_comp_exch(struct rdma_id_private *id_priv,
398			 enum rdma_cm_state comp, enum rdma_cm_state exch)
399{
400	unsigned long flags;
401	int ret;
402
403	spin_lock_irqsave(&id_priv->lock, flags);
404	if ((ret = (id_priv->state == comp)))
405		id_priv->state = exch;
406	spin_unlock_irqrestore(&id_priv->lock, flags);
407	return ret;
408}
409
410static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
411				   enum rdma_cm_state exch)
412{
413	unsigned long flags;
414	enum rdma_cm_state old;
415
416	spin_lock_irqsave(&id_priv->lock, flags);
417	old = id_priv->state;
418	id_priv->state = exch;
419	spin_unlock_irqrestore(&id_priv->lock, flags);
420	return old;
421}
422
423static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
424{
425	return hdr->ip_version >> 4;
426}
427
428static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
429{
430	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
431}
432
433static inline u8 sdp_get_majv(u8 sdp_version)
434{
435	return sdp_version >> 4;
436}
437
438static inline u8 sdp_get_ip_ver(const struct sdp_hh *hh)
439{
440	return hh->ipv_cap >> 4;
441}
442
443static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
444{
445	hh->ipv_cap = (ip_ver << 4) | (hh->ipv_cap & 0xF);
446}
447
448static int cma_igmp_send(struct net_device *ndev, const union ib_gid *mgid, bool join)
449{
450	int retval;
451
452	if (ndev) {
453		union {
454			struct sockaddr sock;
455			struct sockaddr_storage storage;
456		} addr;
457
458		rdma_gid2ip(&addr.sock, mgid);
459
460		CURVNET_SET_QUIET(ndev->if_vnet);
461		if (join)
462			retval = -if_addmulti(ndev, &addr.sock, NULL);
463		else
464			retval = -if_delmulti(ndev, &addr.sock);
465		CURVNET_RESTORE();
466	} else {
467		retval = -ENODEV;
468	}
469	return retval;
470}
471
472static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
473			       struct cma_device *cma_dev)
474{
475	cma_ref_dev(cma_dev);
476	id_priv->cma_dev = cma_dev;
477	id_priv->gid_type = 0;
478	id_priv->id.device = cma_dev->device;
479	id_priv->id.route.addr.dev_addr.transport =
480		rdma_node_get_transport(cma_dev->device->node_type);
481	list_add_tail(&id_priv->list, &cma_dev->id_list);
482}
483
484static void cma_attach_to_dev(struct rdma_id_private *id_priv,
485			      struct cma_device *cma_dev)
486{
487	_cma_attach_to_dev(id_priv, cma_dev);
488	id_priv->gid_type =
489		cma_dev->default_gid_type[id_priv->id.port_num -
490					  rdma_start_port(cma_dev->device)];
491}
492
493void cma_deref_dev(struct cma_device *cma_dev)
494{
495	if (atomic_dec_and_test(&cma_dev->refcount))
496		complete(&cma_dev->comp);
497}
498
499static inline void release_mc(struct kref *kref)
500{
501	struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
502
503	kfree(mc->multicast.ib);
504	kfree(mc);
505}
506
507static void cma_release_dev(struct rdma_id_private *id_priv)
508{
509	mutex_lock(&lock);
510	list_del(&id_priv->list);
511	cma_deref_dev(id_priv->cma_dev);
512	id_priv->cma_dev = NULL;
513	mutex_unlock(&lock);
514}
515
516static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
517{
518	return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
519}
520
521static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
522{
523	return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
524}
525
526static inline unsigned short cma_family(struct rdma_id_private *id_priv)
527{
528	return id_priv->id.route.addr.src_addr.ss_family;
529}
530
531static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
532{
533	struct ib_sa_mcmember_rec rec;
534	int ret = 0;
535
536	if (id_priv->qkey) {
537		if (qkey && id_priv->qkey != qkey)
538			return -EINVAL;
539		return 0;
540	}
541
542	if (qkey) {
543		id_priv->qkey = qkey;
544		return 0;
545	}
546
547	switch (id_priv->id.ps) {
548	case RDMA_PS_UDP:
549	case RDMA_PS_IB:
550		id_priv->qkey = RDMA_UDP_QKEY;
551		break;
552	case RDMA_PS_IPOIB:
553		ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
554		ret = ib_sa_get_mcmember_rec(id_priv->id.device,
555					     id_priv->id.port_num, &rec.mgid,
556					     &rec);
557		if (!ret)
558			id_priv->qkey = be32_to_cpu(rec.qkey);
559		break;
560	default:
561		break;
562	}
563	return ret;
564}
565
566static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
567{
568	dev_addr->dev_type = ARPHRD_INFINIBAND;
569	rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr);
570	ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey));
571}
572
573static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
574{
575	int ret;
576
577	if (addr->sa_family != AF_IB) {
578		ret = rdma_translate_ip(addr, dev_addr);
579	} else {
580		cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
581		ret = 0;
582	}
583
584	return ret;
585}
586
587static inline int cma_validate_port(struct ib_device *device, u8 port,
588				    enum ib_gid_type gid_type,
589				    union ib_gid *gid,
590				    const struct rdma_dev_addr *dev_addr)
591{
592	const int dev_type = dev_addr->dev_type;
593	struct net_device *ndev;
594	int ret = -ENODEV;
595
596	if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
597		return ret;
598
599	if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
600		return ret;
601
602	if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
603		ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
604	} else {
605		ndev = NULL;
606		gid_type = IB_GID_TYPE_IB;
607	}
608
609	ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
610					 ndev, NULL);
611
612	if (ndev)
613		dev_put(ndev);
614
615	return ret;
616}
617
618static int cma_acquire_dev(struct rdma_id_private *id_priv,
619			   struct rdma_id_private *listen_id_priv)
620{
621	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
622	struct cma_device *cma_dev;
623	union ib_gid gid, iboe_gid, *gidp;
624	int ret = -ENODEV;
625	u8 port;
626
627	if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
628	    id_priv->id.ps == RDMA_PS_IPOIB)
629		return -EINVAL;
630
631	mutex_lock(&lock);
632	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
633		    &iboe_gid);
634
635	memcpy(&gid, dev_addr->src_dev_addr +
636	       rdma_addr_gid_offset(dev_addr), sizeof gid);
637
638	if (listen_id_priv) {
639		cma_dev = listen_id_priv->cma_dev;
640		port = listen_id_priv->id.port_num;
641
642		if (rdma_is_port_valid(cma_dev->device, port)) {
643			gidp = rdma_protocol_roce(cma_dev->device, port) ?
644			       &iboe_gid : &gid;
645
646			ret = cma_validate_port(cma_dev->device, port,
647				rdma_protocol_ib(cma_dev->device, port) ?
648				IB_GID_TYPE_IB :
649				listen_id_priv->gid_type, gidp, dev_addr);
650			if (!ret) {
651				id_priv->id.port_num = port;
652				goto out;
653			}
654		}
655	}
656
657	list_for_each_entry(cma_dev, &dev_list, list) {
658		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
659			if (listen_id_priv &&
660			    listen_id_priv->cma_dev == cma_dev &&
661			    listen_id_priv->id.port_num == port)
662				continue;
663
664			gidp = rdma_protocol_roce(cma_dev->device, port) ?
665			       &iboe_gid : &gid;
666
667			ret = cma_validate_port(cma_dev->device, port,
668						rdma_protocol_ib(cma_dev->device, port) ?
669						IB_GID_TYPE_IB :
670						cma_dev->default_gid_type[port - 1],
671						gidp, dev_addr);
672			if (!ret) {
673				id_priv->id.port_num = port;
674				goto out;
675			}
676		}
677	}
678
679out:
680	if (!ret)
681		cma_attach_to_dev(id_priv, cma_dev);
682
683	mutex_unlock(&lock);
684	return ret;
685}
686
687/*
688 * Select the source IB device and address to reach the destination IB address.
689 */
690static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
691{
692	struct cma_device *cma_dev, *cur_dev;
693	struct sockaddr_ib *addr;
694	union ib_gid gid, sgid, *dgid;
695	u16 pkey, index;
696	u8 p;
697	int i;
698
699	cma_dev = NULL;
700	addr = (struct sockaddr_ib *) cma_dst_addr(id_priv);
701	dgid = (union ib_gid *) &addr->sib_addr;
702	pkey = ntohs(addr->sib_pkey);
703
704	list_for_each_entry(cur_dev, &dev_list, list) {
705		for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
706			if (!rdma_cap_af_ib(cur_dev->device, p))
707				continue;
708
709			if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
710				continue;
711
712			for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
713						       &gid, NULL);
714			     i++) {
715				if (!memcmp(&gid, dgid, sizeof(gid))) {
716					cma_dev = cur_dev;
717					sgid = gid;
718					id_priv->id.port_num = p;
719					goto found;
720				}
721
722				if (!cma_dev && (gid.global.subnet_prefix ==
723						 dgid->global.subnet_prefix)) {
724					cma_dev = cur_dev;
725					sgid = gid;
726					id_priv->id.port_num = p;
727				}
728			}
729		}
730	}
731
732	if (!cma_dev)
733		return -ENODEV;
734
735found:
736	cma_attach_to_dev(id_priv, cma_dev);
737	addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
738	memcpy(&addr->sib_addr, &sgid, sizeof sgid);
739	cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
740	return 0;
741}
742
743static void cma_deref_id(struct rdma_id_private *id_priv)
744{
745	if (atomic_dec_and_test(&id_priv->refcount))
746		complete(&id_priv->comp);
747}
748
749struct rdma_cm_id *rdma_create_id(struct vnet *net,
750				  rdma_cm_event_handler event_handler,
751				  void *context, enum rdma_port_space ps,
752				  enum ib_qp_type qp_type)
753{
754	struct rdma_id_private *id_priv;
755
756#ifdef VIMAGE
757	if (net == NULL)
758		return ERR_PTR(-EINVAL);
759#endif
760	id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
761	if (!id_priv)
762		return ERR_PTR(-ENOMEM);
763
764	id_priv->owner = task_pid_nr(current);
765	id_priv->state = RDMA_CM_IDLE;
766	id_priv->id.context = context;
767	id_priv->id.event_handler = event_handler;
768	id_priv->id.ps = ps;
769	id_priv->id.qp_type = qp_type;
770	spin_lock_init(&id_priv->lock);
771	mutex_init(&id_priv->qp_mutex);
772	init_completion(&id_priv->comp);
773	atomic_set(&id_priv->refcount, 1);
774	mutex_init(&id_priv->handler_mutex);
775	INIT_LIST_HEAD(&id_priv->listen_list);
776	INIT_LIST_HEAD(&id_priv->mc_list);
777	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
778	id_priv->id.route.addr.dev_addr.net = net;
779
780	return &id_priv->id;
781}
782EXPORT_SYMBOL(rdma_create_id);
783
784static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
785{
786	struct ib_qp_attr qp_attr;
787	int qp_attr_mask, ret;
788
789	qp_attr.qp_state = IB_QPS_INIT;
790	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
791	if (ret)
792		return ret;
793
794	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
795	if (ret)
796		return ret;
797
798	qp_attr.qp_state = IB_QPS_RTR;
799	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
800	if (ret)
801		return ret;
802
803	qp_attr.qp_state = IB_QPS_RTS;
804	qp_attr.sq_psn = 0;
805	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
806
807	return ret;
808}
809
810static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
811{
812	struct ib_qp_attr qp_attr;
813	int qp_attr_mask, ret;
814
815	qp_attr.qp_state = IB_QPS_INIT;
816	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
817	if (ret)
818		return ret;
819
820	return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
821}
822
823int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
824		   struct ib_qp_init_attr *qp_init_attr)
825{
826	struct rdma_id_private *id_priv;
827	struct ib_qp *qp;
828	int ret;
829
830	id_priv = container_of(id, struct rdma_id_private, id);
831	if (id->device != pd->device)
832		return -EINVAL;
833
834	qp_init_attr->port_num = id->port_num;
835	qp = ib_create_qp(pd, qp_init_attr);
836	if (IS_ERR(qp))
837		return PTR_ERR(qp);
838
839	if (id->qp_type == IB_QPT_UD)
840		ret = cma_init_ud_qp(id_priv, qp);
841	else
842		ret = cma_init_conn_qp(id_priv, qp);
843	if (ret)
844		goto err;
845
846	id->qp = qp;
847	id_priv->qp_num = qp->qp_num;
848	id_priv->srq = (qp->srq != NULL);
849	return 0;
850err:
851	ib_destroy_qp(qp);
852	return ret;
853}
854EXPORT_SYMBOL(rdma_create_qp);
855
856void rdma_destroy_qp(struct rdma_cm_id *id)
857{
858	struct rdma_id_private *id_priv;
859
860	id_priv = container_of(id, struct rdma_id_private, id);
861	mutex_lock(&id_priv->qp_mutex);
862	ib_destroy_qp(id_priv->id.qp);
863	id_priv->id.qp = NULL;
864	mutex_unlock(&id_priv->qp_mutex);
865}
866EXPORT_SYMBOL(rdma_destroy_qp);
867
868static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
869			     struct rdma_conn_param *conn_param)
870{
871	struct ib_qp_attr qp_attr;
872	int qp_attr_mask, ret;
873	union ib_gid sgid;
874
875	mutex_lock(&id_priv->qp_mutex);
876	if (!id_priv->id.qp) {
877		ret = 0;
878		goto out;
879	}
880
881	/* Need to update QP attributes from default values. */
882	qp_attr.qp_state = IB_QPS_INIT;
883	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
884	if (ret)
885		goto out;
886
887	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
888	if (ret)
889		goto out;
890
891	qp_attr.qp_state = IB_QPS_RTR;
892	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
893	if (ret)
894		goto out;
895
896	ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
897			   qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
898	if (ret)
899		goto out;
900
901	BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
902
903	if (conn_param)
904		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
905	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
906out:
907	mutex_unlock(&id_priv->qp_mutex);
908	return ret;
909}
910
911static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
912			     struct rdma_conn_param *conn_param)
913{
914	struct ib_qp_attr qp_attr;
915	int qp_attr_mask, ret;
916
917	mutex_lock(&id_priv->qp_mutex);
918	if (!id_priv->id.qp) {
919		ret = 0;
920		goto out;
921	}
922
923	qp_attr.qp_state = IB_QPS_RTS;
924	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
925	if (ret)
926		goto out;
927
928	if (conn_param)
929		qp_attr.max_rd_atomic = conn_param->initiator_depth;
930	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
931out:
932	mutex_unlock(&id_priv->qp_mutex);
933	return ret;
934}
935
936static int cma_modify_qp_err(struct rdma_id_private *id_priv)
937{
938	struct ib_qp_attr qp_attr;
939	int ret;
940
941	mutex_lock(&id_priv->qp_mutex);
942	if (!id_priv->id.qp) {
943		ret = 0;
944		goto out;
945	}
946
947	qp_attr.qp_state = IB_QPS_ERR;
948	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
949out:
950	mutex_unlock(&id_priv->qp_mutex);
951	return ret;
952}
953
954static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
955			       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
956{
957	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
958	int ret;
959	u16 pkey;
960
961	if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num))
962		pkey = 0xffff;
963	else
964		pkey = ib_addr_get_pkey(dev_addr);
965
966	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
967				  pkey, &qp_attr->pkey_index);
968	if (ret)
969		return ret;
970
971	qp_attr->port_num = id_priv->id.port_num;
972	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
973
974	if (id_priv->id.qp_type == IB_QPT_UD) {
975		ret = cma_set_qkey(id_priv, 0);
976		if (ret)
977			return ret;
978
979		qp_attr->qkey = id_priv->qkey;
980		*qp_attr_mask |= IB_QP_QKEY;
981	} else {
982		qp_attr->qp_access_flags = 0;
983		*qp_attr_mask |= IB_QP_ACCESS_FLAGS;
984	}
985	return 0;
986}
987
988int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
989		       int *qp_attr_mask)
990{
991	struct rdma_id_private *id_priv;
992	int ret = 0;
993
994	id_priv = container_of(id, struct rdma_id_private, id);
995	if (rdma_cap_ib_cm(id->device, id->port_num)) {
996		if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD))
997			ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
998		else
999			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
1000						 qp_attr_mask);
1001
1002		if (qp_attr->qp_state == IB_QPS_RTR)
1003			qp_attr->rq_psn = id_priv->seq_num;
1004	} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
1005		if (!id_priv->cm_id.iw) {
1006			qp_attr->qp_access_flags = 0;
1007			*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
1008		} else
1009			ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
1010						 qp_attr_mask);
1011		qp_attr->port_num = id_priv->id.port_num;
1012		*qp_attr_mask |= IB_QP_PORT;
1013	} else
1014		ret = -ENOSYS;
1015
1016	return ret;
1017}
1018EXPORT_SYMBOL(rdma_init_qp_attr);
1019
1020static inline int cma_zero_addr(struct sockaddr *addr)
1021{
1022	switch (addr->sa_family) {
1023	case AF_INET:
1024		return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
1025	case AF_INET6:
1026		return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr);
1027	case AF_IB:
1028		return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
1029	default:
1030		return 0;
1031	}
1032}
1033
1034static inline int cma_loopback_addr(struct sockaddr *addr)
1035{
1036	switch (addr->sa_family) {
1037	case AF_INET:
1038		return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
1039	case AF_INET6:
1040		return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
1041	case AF_IB:
1042		return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
1043	default:
1044		return 0;
1045	}
1046}
1047
1048static inline int cma_any_addr(struct sockaddr *addr)
1049{
1050	return cma_zero_addr(addr) || cma_loopback_addr(addr);
1051}
1052
1053static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
1054{
1055	if (src->sa_family != dst->sa_family)
1056		return -1;
1057
1058	switch (src->sa_family) {
1059	case AF_INET:
1060		return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
1061		       ((struct sockaddr_in *) dst)->sin_addr.s_addr;
1062	case AF_INET6:
1063		return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
1064				     &((struct sockaddr_in6 *) dst)->sin6_addr);
1065	default:
1066		return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
1067				   &((struct sockaddr_ib *) dst)->sib_addr);
1068	}
1069}
1070
1071static __be16 cma_port(struct sockaddr *addr)
1072{
1073	struct sockaddr_ib *sib;
1074
1075	switch (addr->sa_family) {
1076	case AF_INET:
1077		return ((struct sockaddr_in *) addr)->sin_port;
1078	case AF_INET6:
1079		return ((struct sockaddr_in6 *) addr)->sin6_port;
1080	case AF_IB:
1081		sib = (struct sockaddr_ib *) addr;
1082		return htons((u16) (be64_to_cpu(sib->sib_sid) &
1083				    be64_to_cpu(sib->sib_sid_mask)));
1084	default:
1085		return 0;
1086	}
1087}
1088
1089static inline int cma_any_port(struct sockaddr *addr)
1090{
1091	return !cma_port(addr);
1092}
1093
1094static void cma_save_ib_info(struct sockaddr *src_addr,
1095			     struct sockaddr *dst_addr,
1096			     struct rdma_cm_id *listen_id,
1097			     struct ib_sa_path_rec *path)
1098{
1099	struct sockaddr_ib *listen_ib, *ib;
1100
1101	listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
1102	if (src_addr) {
1103		ib = (struct sockaddr_ib *)src_addr;
1104		ib->sib_family = AF_IB;
1105		if (path) {
1106			ib->sib_pkey = path->pkey;
1107			ib->sib_flowinfo = path->flow_label;
1108			memcpy(&ib->sib_addr, &path->sgid, 16);
1109			ib->sib_sid = path->service_id;
1110			ib->sib_scope_id = 0;
1111		} else {
1112			ib->sib_pkey = listen_ib->sib_pkey;
1113			ib->sib_flowinfo = listen_ib->sib_flowinfo;
1114			ib->sib_addr = listen_ib->sib_addr;
1115			ib->sib_sid = listen_ib->sib_sid;
1116			ib->sib_scope_id = listen_ib->sib_scope_id;
1117		}
1118		ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
1119	}
1120	if (dst_addr) {
1121		ib = (struct sockaddr_ib *)dst_addr;
1122		ib->sib_family = AF_IB;
1123		if (path) {
1124			ib->sib_pkey = path->pkey;
1125			ib->sib_flowinfo = path->flow_label;
1126			memcpy(&ib->sib_addr, &path->dgid, 16);
1127		}
1128	}
1129}
1130
1131static void cma_save_ip4_info(struct sockaddr_in *src_addr,
1132			      struct sockaddr_in *dst_addr,
1133			      struct cma_hdr *hdr,
1134			      __be16 local_port)
1135{
1136	if (src_addr) {
1137		*src_addr = (struct sockaddr_in) {
1138			.sin_len = sizeof(struct sockaddr_in),
1139			.sin_family = AF_INET,
1140			.sin_addr.s_addr = hdr->dst_addr.ip4.addr,
1141			.sin_port = local_port,
1142		};
1143	}
1144
1145	if (dst_addr) {
1146		*dst_addr = (struct sockaddr_in) {
1147			.sin_len = sizeof(struct sockaddr_in),
1148			.sin_family = AF_INET,
1149			.sin_addr.s_addr = hdr->src_addr.ip4.addr,
1150			.sin_port = hdr->port,
1151		};
1152	}
1153}
1154
1155static void cma_ip6_clear_scope_id(struct in6_addr *addr)
1156{
1157	/* make sure link local scope ID gets zeroed */
1158	if (IN6_IS_SCOPE_LINKLOCAL(addr) ||
1159	    IN6_IS_ADDR_MC_INTFACELOCAL(addr)) {
1160		/* use byte-access to be alignment safe */
1161		addr->s6_addr[2] = 0;
1162		addr->s6_addr[3] = 0;
1163	}
1164}
1165
1166static void cma_save_ip6_info(struct sockaddr_in6 *src_addr,
1167			      struct sockaddr_in6 *dst_addr,
1168			      struct cma_hdr *hdr,
1169			      __be16 local_port)
1170{
1171	if (src_addr) {
1172		*src_addr = (struct sockaddr_in6) {
1173			.sin6_len = sizeof(struct sockaddr_in6),
1174			.sin6_family = AF_INET6,
1175			.sin6_addr = hdr->dst_addr.ip6,
1176			.sin6_port = local_port,
1177		};
1178		cma_ip6_clear_scope_id(&src_addr->sin6_addr);
1179	}
1180
1181	if (dst_addr) {
1182		*dst_addr = (struct sockaddr_in6) {
1183			.sin6_len = sizeof(struct sockaddr_in6),
1184			.sin6_family = AF_INET6,
1185			.sin6_addr = hdr->src_addr.ip6,
1186			.sin6_port = hdr->port,
1187		};
1188		cma_ip6_clear_scope_id(&dst_addr->sin6_addr);
1189	}
1190}
1191
1192static u16 cma_port_from_service_id(__be64 service_id)
1193{
1194	return (u16)be64_to_cpu(service_id);
1195}
1196
1197static int sdp_save_ip_info(struct sockaddr *src_addr,
1198			    struct sockaddr *dst_addr,
1199			    const struct sdp_hh *hdr,
1200			    __be64 service_id)
1201{
1202	__be16 local_port;
1203
1204	BUG_ON(src_addr == NULL || dst_addr == NULL);
1205
1206	if (sdp_get_majv(hdr->majv_minv) != SDP_MAJ_VERSION)
1207		return -EINVAL;
1208
1209	local_port = htons(cma_port_from_service_id(service_id));
1210
1211	switch (sdp_get_ip_ver(hdr)) {
1212	case 4: {
1213		struct sockaddr_in *s4, *d4;
1214
1215		s4 = (void *)src_addr;
1216		d4 = (void *)dst_addr;
1217
1218		*s4 = (struct sockaddr_in) {
1219			.sin_len = sizeof(*s4),
1220			.sin_family = AF_INET,
1221			.sin_addr.s_addr = hdr->dst_addr.ip4.addr,
1222			.sin_port = local_port,
1223		};
1224		*d4 = (struct sockaddr_in) {
1225			.sin_len = sizeof(*d4),
1226			.sin_family = AF_INET,
1227			.sin_addr.s_addr = hdr->src_addr.ip4.addr,
1228			.sin_port = hdr->port,
1229		};
1230		break;
1231	}
1232	case 6: {
1233		struct sockaddr_in6 *s6, *d6;
1234
1235		s6 = (void *)src_addr;
1236		d6 = (void *)dst_addr;
1237
1238		*s6 = (struct sockaddr_in6) {
1239			.sin6_len = sizeof(*s6),
1240			.sin6_family = AF_INET6,
1241			.sin6_addr = hdr->dst_addr.ip6,
1242			.sin6_port = local_port,
1243		};
1244		*d6 = (struct sockaddr_in6) {
1245			.sin6_len = sizeof(*d6),
1246			.sin6_family = AF_INET6,
1247			.sin6_addr = hdr->src_addr.ip6,
1248			.sin6_port = hdr->port,
1249		};
1250		cma_ip6_clear_scope_id(&s6->sin6_addr);
1251		cma_ip6_clear_scope_id(&d6->sin6_addr);
1252		break;
1253	}
1254	default:
1255		return -EAFNOSUPPORT;
1256	}
1257
1258	return 0;
1259}
1260
1261static int cma_save_ip_info(struct sockaddr *src_addr,
1262			    struct sockaddr *dst_addr,
1263			    struct ib_cm_event *ib_event,
1264			    __be64 service_id)
1265{
1266	struct cma_hdr *hdr;
1267	__be16 port;
1268
1269	if (rdma_ps_from_service_id(service_id) == RDMA_PS_SDP)
1270		return sdp_save_ip_info(src_addr, dst_addr,
1271		    ib_event->private_data, service_id);
1272
1273	hdr = ib_event->private_data;
1274	if (hdr->cma_version != CMA_VERSION)
1275		return -EINVAL;
1276
1277	port = htons(cma_port_from_service_id(service_id));
1278
1279	switch (cma_get_ip_ver(hdr)) {
1280	case 4:
1281		cma_save_ip4_info((struct sockaddr_in *)src_addr,
1282				  (struct sockaddr_in *)dst_addr, hdr, port);
1283		break;
1284	case 6:
1285		cma_save_ip6_info((struct sockaddr_in6 *)src_addr,
1286				  (struct sockaddr_in6 *)dst_addr, hdr, port);
1287		break;
1288	default:
1289		return -EAFNOSUPPORT;
1290	}
1291
1292	return 0;
1293}
1294
1295static int cma_save_net_info(struct sockaddr *src_addr,
1296			     struct sockaddr *dst_addr,
1297			     struct rdma_cm_id *listen_id,
1298			     struct ib_cm_event *ib_event,
1299			     sa_family_t sa_family, __be64 service_id)
1300{
1301	if (sa_family == AF_IB) {
1302		if (ib_event->event == IB_CM_REQ_RECEIVED)
1303			cma_save_ib_info(src_addr, dst_addr, listen_id,
1304					 ib_event->param.req_rcvd.primary_path);
1305		else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
1306			cma_save_ib_info(src_addr, dst_addr, listen_id, NULL);
1307		return 0;
1308	}
1309
1310	return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id);
1311}
1312
1313static int cma_save_req_info(const struct ib_cm_event *ib_event,
1314			     struct cma_req_info *req)
1315{
1316	const struct ib_cm_req_event_param *req_param =
1317		&ib_event->param.req_rcvd;
1318	const struct ib_cm_sidr_req_event_param *sidr_param =
1319		&ib_event->param.sidr_req_rcvd;
1320
1321	switch (ib_event->event) {
1322	case IB_CM_REQ_RECEIVED:
1323		req->device	= req_param->listen_id->device;
1324		req->port	= req_param->port;
1325		memcpy(&req->local_gid, &req_param->primary_path->sgid,
1326		       sizeof(req->local_gid));
1327		req->has_gid	= true;
1328		req->service_id	= req_param->primary_path->service_id;
1329		req->pkey	= be16_to_cpu(req_param->primary_path->pkey);
1330		if (req->pkey != req_param->bth_pkey)
1331			pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
1332					    "RDMA CMA: in the future this may cause the request to be dropped\n",
1333					    req_param->bth_pkey, req->pkey);
1334		break;
1335	case IB_CM_SIDR_REQ_RECEIVED:
1336		req->device	= sidr_param->listen_id->device;
1337		req->port	= sidr_param->port;
1338		req->has_gid	= false;
1339		req->service_id	= sidr_param->service_id;
1340		req->pkey	= sidr_param->pkey;
1341		if (req->pkey != sidr_param->bth_pkey)
1342			pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n"
1343					    "RDMA CMA: in the future this may cause the request to be dropped\n",
1344					    sidr_param->bth_pkey, req->pkey);
1345		break;
1346	default:
1347		return -EINVAL;
1348	}
1349
1350	return 0;
1351}
1352
1353static bool validate_ipv4_net_dev(struct net_device *net_dev,
1354				  const struct sockaddr_in *dst_addr,
1355				  const struct sockaddr_in *src_addr)
1356{
1357#ifdef INET
1358	__be32 daddr = dst_addr->sin_addr.s_addr,
1359	       saddr = src_addr->sin_addr.s_addr;
1360	struct net_device *dst_dev;
1361	struct nhop_object *nh;
1362	bool ret;
1363
1364	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1365	    ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) ||
1366	    ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) ||
1367	    ipv4_is_loopback(saddr))
1368		return false;
1369
1370	dst_dev = ip_dev_find(net_dev->if_vnet, daddr);
1371	if (dst_dev != net_dev) {
1372		if (dst_dev != NULL)
1373			dev_put(dst_dev);
1374		return false;
1375	}
1376	dev_put(dst_dev);
1377
1378	/*
1379	 * Check for loopback.
1380	 */
1381	if (saddr == daddr)
1382		return true;
1383
1384	CURVNET_SET(net_dev->if_vnet);
1385	nh = fib4_lookup(RT_DEFAULT_FIB, src_addr->sin_addr, 0, NHR_NONE, 0);
1386	if (nh != NULL)
1387		ret = (nh->nh_ifp == net_dev);
1388	else
1389		ret = false;
1390	CURVNET_RESTORE();
1391	return ret;
1392#else
1393	return false;
1394#endif
1395}
1396
1397static bool validate_ipv6_net_dev(struct net_device *net_dev,
1398				  const struct sockaddr_in6 *dst_addr,
1399				  const struct sockaddr_in6 *src_addr)
1400{
1401#ifdef INET6
1402	struct sockaddr_in6 src_tmp = *src_addr;
1403	struct sockaddr_in6 dst_tmp = *dst_addr;
1404	struct net_device *dst_dev;
1405	struct nhop_object *nh;
1406	bool ret;
1407
1408	dst_dev = ip6_dev_find(net_dev->if_vnet, dst_tmp.sin6_addr,
1409	    net_dev->if_index);
1410	if (dst_dev != net_dev) {
1411		if (dst_dev != NULL)
1412			dev_put(dst_dev);
1413		return false;
1414	}
1415	dev_put(dst_dev);
1416
1417	CURVNET_SET(net_dev->if_vnet);
1418
1419	/*
1420	 * Make sure the scope ID gets embedded.
1421	 */
1422	src_tmp.sin6_scope_id = net_dev->if_index;
1423	sa6_embedscope(&src_tmp, 0);
1424
1425	dst_tmp.sin6_scope_id = net_dev->if_index;
1426	sa6_embedscope(&dst_tmp, 0);
1427
1428	/*
1429	 * Check for loopback after scope ID
1430	 * has been embedded:
1431	 */
1432	if (memcmp(&src_tmp.sin6_addr, &dst_tmp.sin6_addr,
1433	    sizeof(dst_tmp.sin6_addr)) == 0) {
1434		ret = true;
1435	} else {
1436		/* non-loopback case */
1437		nh = fib6_lookup(RT_DEFAULT_FIB, &src_addr->sin6_addr,
1438		    net_dev->if_index, NHR_NONE, 0);
1439		if (nh != NULL)
1440			ret = (nh->nh_ifp == net_dev);
1441		else
1442			ret = false;
1443	}
1444	CURVNET_RESTORE();
1445	return ret;
1446#else
1447	return false;
1448#endif
1449}
1450
1451static bool validate_net_dev(struct net_device *net_dev,
1452			     const struct sockaddr *daddr,
1453			     const struct sockaddr *saddr)
1454{
1455	const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr;
1456	const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr;
1457	const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
1458	const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr;
1459
1460	switch (daddr->sa_family) {
1461	case AF_INET:
1462		return saddr->sa_family == AF_INET &&
1463		       validate_ipv4_net_dev(net_dev, daddr4, saddr4);
1464
1465	case AF_INET6:
1466		return saddr->sa_family == AF_INET6 &&
1467		       validate_ipv6_net_dev(net_dev, daddr6, saddr6);
1468
1469	default:
1470		return false;
1471	}
1472}
1473
1474static struct net_device *
1475roce_get_net_dev_by_cm_event(struct ib_device *device, u8 port_num,
1476    const struct ib_cm_event *ib_event)
1477{
1478	struct ib_gid_attr sgid_attr;
1479	union ib_gid sgid;
1480	int err = -EINVAL;
1481
1482	if (ib_event->event == IB_CM_REQ_RECEIVED) {
1483		err = ib_get_cached_gid(device, port_num,
1484		    ib_event->param.req_rcvd.ppath_sgid_index, &sgid, &sgid_attr);
1485	} else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
1486		err = ib_get_cached_gid(device, port_num,
1487		    ib_event->param.sidr_req_rcvd.sgid_index, &sgid, &sgid_attr);
1488	}
1489	if (err)
1490		return (NULL);
1491	return (sgid_attr.ndev);
1492}
1493
1494static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
1495					  const struct cma_req_info *req)
1496{
1497	struct sockaddr_storage listen_addr_storage, src_addr_storage;
1498	struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage,
1499			*src_addr = (struct sockaddr *)&src_addr_storage;
1500	struct net_device *net_dev;
1501	const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL;
1502	struct epoch_tracker et;
1503	int err;
1504
1505	err = cma_save_ip_info(listen_addr, src_addr, ib_event,
1506			       req->service_id);
1507	if (err)
1508		return ERR_PTR(err);
1509
1510	if (rdma_protocol_roce(req->device, req->port)) {
1511		net_dev = roce_get_net_dev_by_cm_event(req->device, req->port,
1512						       ib_event);
1513	} else {
1514		net_dev = ib_get_net_dev_by_params(req->device, req->port,
1515						   req->pkey,
1516						   gid, listen_addr);
1517	}
1518	if (!net_dev)
1519		return ERR_PTR(-ENODEV);
1520
1521	NET_EPOCH_ENTER(et);
1522	if (!validate_net_dev(net_dev, listen_addr, src_addr)) {
1523		NET_EPOCH_EXIT(et);
1524		dev_put(net_dev);
1525		return ERR_PTR(-EHOSTUNREACH);
1526	}
1527	NET_EPOCH_EXIT(et);
1528
1529	return net_dev;
1530}
1531
1532static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id)
1533{
1534	return (be64_to_cpu(service_id) >> 16) & 0xffff;
1535}
1536
1537static bool sdp_match_private_data(struct rdma_id_private *id_priv,
1538				   const struct sdp_hh *hdr,
1539				   struct sockaddr *addr)
1540{
1541	__be32 ip4_addr;
1542	struct in6_addr ip6_addr;
1543
1544	switch (addr->sa_family) {
1545	case AF_INET:
1546		ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr;
1547		if (sdp_get_ip_ver(hdr) != 4)
1548			return false;
1549		if (!cma_any_addr(addr) &&
1550		    hdr->dst_addr.ip4.addr != ip4_addr)
1551			return false;
1552		break;
1553	case AF_INET6:
1554		ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr;
1555		if (sdp_get_ip_ver(hdr) != 6)
1556			return false;
1557		cma_ip6_clear_scope_id(&ip6_addr);
1558		if (!cma_any_addr(addr) &&
1559		    memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr)))
1560			return false;
1561		break;
1562	case AF_IB:
1563		return true;
1564	default:
1565		return false;
1566	}
1567
1568	return true;
1569}
1570
1571static bool cma_match_private_data(struct rdma_id_private *id_priv,
1572				   const void *vhdr)
1573{
1574	const struct cma_hdr *hdr = vhdr;
1575	struct sockaddr *addr = cma_src_addr(id_priv);
1576	__be32 ip4_addr;
1577	struct in6_addr ip6_addr;
1578
1579	if (cma_any_addr(addr) && !id_priv->afonly)
1580		return true;
1581
1582	if (id_priv->id.ps == RDMA_PS_SDP)
1583		return sdp_match_private_data(id_priv, vhdr, addr);
1584
1585	switch (addr->sa_family) {
1586	case AF_INET:
1587		ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr;
1588		if (cma_get_ip_ver(hdr) != 4)
1589			return false;
1590		if (!cma_any_addr(addr) &&
1591		    hdr->dst_addr.ip4.addr != ip4_addr)
1592			return false;
1593		break;
1594	case AF_INET6:
1595		ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr;
1596		if (cma_get_ip_ver(hdr) != 6)
1597			return false;
1598		cma_ip6_clear_scope_id(&ip6_addr);
1599		if (!cma_any_addr(addr) &&
1600		    memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr)))
1601			return false;
1602		break;
1603	case AF_IB:
1604		return true;
1605	default:
1606		return false;
1607	}
1608
1609	return true;
1610}
1611
1612static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num)
1613{
1614	enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num);
1615	enum rdma_transport_type transport =
1616		rdma_node_get_transport(device->node_type);
1617
1618	return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB;
1619}
1620
1621static bool cma_protocol_roce(const struct rdma_cm_id *id)
1622{
1623	struct ib_device *device = id->device;
1624	const int port_num = id->port_num ?: rdma_start_port(device);
1625
1626	return cma_protocol_roce_dev_port(device, port_num);
1627}
1628
1629static bool cma_match_net_dev(const struct rdma_cm_id *id,
1630			      const struct net_device *net_dev,
1631			      u8 port_num)
1632{
1633	const struct rdma_addr *addr = &id->route.addr;
1634
1635	if (!net_dev) {
1636		if (id->port_num && id->port_num != port_num)
1637			return false;
1638
1639		if (id->ps == RDMA_PS_SDP) {
1640			if (addr->src_addr.ss_family == AF_INET ||
1641			    addr->src_addr.ss_family == AF_INET6)
1642				return true;
1643			return false;
1644		}
1645		/* This request is an AF_IB request or a RoCE request */
1646		return addr->src_addr.ss_family == AF_IB ||
1647		    cma_protocol_roce_dev_port(id->device, port_num);
1648	}
1649
1650	return !addr->dev_addr.bound_dev_if ||
1651	       (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
1652		addr->dev_addr.bound_dev_if == net_dev->if_index);
1653}
1654
1655static struct rdma_id_private *cma_find_listener(
1656		const struct rdma_bind_list *bind_list,
1657		const struct ib_cm_id *cm_id,
1658		const struct ib_cm_event *ib_event,
1659		const struct cma_req_info *req,
1660		const struct net_device *net_dev)
1661{
1662	struct rdma_id_private *id_priv, *id_priv_dev;
1663
1664	if (!bind_list)
1665		return ERR_PTR(-EINVAL);
1666
1667	hlist_for_each_entry(id_priv, &bind_list->owners, node) {
1668		if (cma_match_private_data(id_priv, ib_event->private_data)) {
1669			if (id_priv->id.device == cm_id->device &&
1670			    cma_match_net_dev(&id_priv->id, net_dev, req->port))
1671				return id_priv;
1672			list_for_each_entry(id_priv_dev,
1673					    &id_priv->listen_list,
1674					    listen_list) {
1675				if (id_priv_dev->id.device == cm_id->device &&
1676				    cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
1677					return id_priv_dev;
1678			}
1679		}
1680	}
1681
1682	return ERR_PTR(-EINVAL);
1683}
1684
1685static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
1686						 struct ib_cm_event *ib_event,
1687						 struct net_device **net_dev)
1688{
1689	struct cma_req_info req;
1690	struct rdma_bind_list *bind_list;
1691	struct rdma_id_private *id_priv;
1692	int err;
1693
1694	err = cma_save_req_info(ib_event, &req);
1695	if (err)
1696		return ERR_PTR(err);
1697
1698	if (rdma_ps_from_service_id(cm_id->service_id) == RDMA_PS_SDP) {
1699		*net_dev = NULL;
1700		goto there_is_no_net_dev;
1701	}
1702
1703	*net_dev = cma_get_net_dev(ib_event, &req);
1704	if (IS_ERR(*net_dev)) {
1705		if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
1706			/* Assuming the protocol is AF_IB */
1707			*net_dev = NULL;
1708		} else {
1709			return ERR_CAST(*net_dev);
1710		}
1711	}
1712
1713there_is_no_net_dev:
1714	bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
1715				rdma_ps_from_service_id(req.service_id),
1716				cma_port_from_service_id(req.service_id));
1717	id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
1718	if (IS_ERR(id_priv) && *net_dev) {
1719		dev_put(*net_dev);
1720		*net_dev = NULL;
1721	}
1722
1723	return id_priv;
1724}
1725
1726static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
1727{
1728	if (cma_family(id_priv) == AF_IB)
1729		return 0;
1730	if (id_priv->id.ps == RDMA_PS_SDP)
1731		return 0;
1732	return sizeof(struct cma_hdr);
1733}
1734
1735static void cma_cancel_route(struct rdma_id_private *id_priv)
1736{
1737	if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) {
1738		if (id_priv->query)
1739			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
1740	}
1741}
1742
1743static void cma_cancel_listens(struct rdma_id_private *id_priv)
1744{
1745	struct rdma_id_private *dev_id_priv;
1746
1747	/*
1748	 * Remove from listen_any_list to prevent added devices from spawning
1749	 * additional listen requests.
1750	 */
1751	mutex_lock(&lock);
1752	list_del(&id_priv->list);
1753
1754	while (!list_empty(&id_priv->listen_list)) {
1755		dev_id_priv = list_entry(id_priv->listen_list.next,
1756					 struct rdma_id_private, listen_list);
1757		/* sync with device removal to avoid duplicate destruction */
1758		list_del_init(&dev_id_priv->list);
1759		list_del(&dev_id_priv->listen_list);
1760		mutex_unlock(&lock);
1761
1762		rdma_destroy_id(&dev_id_priv->id);
1763		mutex_lock(&lock);
1764	}
1765	mutex_unlock(&lock);
1766}
1767
1768static void cma_cancel_operation(struct rdma_id_private *id_priv,
1769				 enum rdma_cm_state state)
1770{
1771	switch (state) {
1772	case RDMA_CM_ADDR_QUERY:
1773		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
1774		break;
1775	case RDMA_CM_ROUTE_QUERY:
1776		cma_cancel_route(id_priv);
1777		break;
1778	case RDMA_CM_LISTEN:
1779		if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev)
1780			cma_cancel_listens(id_priv);
1781		break;
1782	default:
1783		break;
1784	}
1785}
1786
1787static void cma_release_port(struct rdma_id_private *id_priv)
1788{
1789	struct rdma_bind_list *bind_list = id_priv->bind_list;
1790	struct vnet *net = id_priv->id.route.addr.dev_addr.net;
1791
1792	if (!bind_list)
1793		return;
1794
1795	mutex_lock(&lock);
1796	hlist_del(&id_priv->node);
1797	if (hlist_empty(&bind_list->owners)) {
1798		cma_ps_remove(net, bind_list->ps, bind_list->port);
1799		kfree(bind_list);
1800	}
1801	mutex_unlock(&lock);
1802}
1803
1804static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
1805{
1806	struct cma_multicast *mc;
1807
1808	while (!list_empty(&id_priv->mc_list)) {
1809		mc = container_of(id_priv->mc_list.next,
1810				  struct cma_multicast, list);
1811		list_del(&mc->list);
1812		if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
1813				      id_priv->id.port_num)) {
1814			ib_sa_free_multicast(mc->multicast.ib);
1815			kfree(mc);
1816		} else {
1817			if (mc->igmp_joined) {
1818				struct rdma_dev_addr *dev_addr =
1819					&id_priv->id.route.addr.dev_addr;
1820				struct net_device *ndev = NULL;
1821
1822				if (dev_addr->bound_dev_if)
1823					ndev = dev_get_by_index(dev_addr->net,
1824								dev_addr->bound_dev_if);
1825				if (ndev) {
1826					cma_igmp_send(ndev,
1827						      &mc->multicast.ib->rec.mgid,
1828						      false);
1829					dev_put(ndev);
1830				}
1831			}
1832			kref_put(&mc->mcref, release_mc);
1833		}
1834	}
1835}
1836
1837void rdma_destroy_id(struct rdma_cm_id *id)
1838{
1839	struct rdma_id_private *id_priv;
1840	enum rdma_cm_state state;
1841
1842	id_priv = container_of(id, struct rdma_id_private, id);
1843	state = cma_exch(id_priv, RDMA_CM_DESTROYING);
1844	cma_cancel_operation(id_priv, state);
1845
1846	/*
1847	 * Wait for any active callback to finish.  New callbacks will find
1848	 * the id_priv state set to destroying and abort.
1849	 */
1850	mutex_lock(&id_priv->handler_mutex);
1851	mutex_unlock(&id_priv->handler_mutex);
1852
1853	if (id_priv->cma_dev) {
1854		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
1855			if (id_priv->cm_id.ib)
1856				ib_destroy_cm_id(id_priv->cm_id.ib);
1857		} else if (rdma_cap_iw_cm(id_priv->id.device, 1)) {
1858			if (id_priv->cm_id.iw)
1859				iw_destroy_cm_id(id_priv->cm_id.iw);
1860		}
1861		cma_leave_mc_groups(id_priv);
1862		cma_release_dev(id_priv);
1863	}
1864
1865	cma_release_port(id_priv);
1866	cma_deref_id(id_priv);
1867	wait_for_completion(&id_priv->comp);
1868
1869	if (id_priv->internal_id)
1870		cma_deref_id(id_priv->id.context);
1871
1872	kfree(id_priv->id.route.path_rec);
1873	kfree(id_priv);
1874}
1875EXPORT_SYMBOL(rdma_destroy_id);
1876
1877static int cma_rep_recv(struct rdma_id_private *id_priv)
1878{
1879	int ret;
1880
1881	ret = cma_modify_qp_rtr(id_priv, NULL);
1882	if (ret)
1883		goto reject;
1884
1885	ret = cma_modify_qp_rts(id_priv, NULL);
1886	if (ret)
1887		goto reject;
1888
1889	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
1890	if (ret)
1891		goto reject;
1892
1893	return 0;
1894reject:
1895	cma_modify_qp_err(id_priv);
1896	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
1897		       NULL, 0, NULL, 0);
1898	return ret;
1899}
1900
1901static int sdp_verify_rep(const struct sdp_hah *data)
1902{
1903	if (sdp_get_majv(data->majv_minv) != SDP_MAJ_VERSION)
1904		return -EINVAL;
1905	return 0;
1906}
1907
1908static void cma_set_rep_event_data(struct rdma_cm_event *event,
1909				   struct ib_cm_rep_event_param *rep_data,
1910				   void *private_data)
1911{
1912	event->param.conn.private_data = private_data;
1913	event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
1914	event->param.conn.responder_resources = rep_data->responder_resources;
1915	event->param.conn.initiator_depth = rep_data->initiator_depth;
1916	event->param.conn.flow_control = rep_data->flow_control;
1917	event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
1918	event->param.conn.srq = rep_data->srq;
1919	event->param.conn.qp_num = rep_data->remote_qpn;
1920}
1921
1922static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1923{
1924	struct rdma_id_private *id_priv = cm_id->context;
1925	struct rdma_cm_event event;
1926	int ret = 0;
1927
1928	mutex_lock(&id_priv->handler_mutex);
1929	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
1930	     id_priv->state != RDMA_CM_CONNECT) ||
1931	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
1932	     id_priv->state != RDMA_CM_DISCONNECT))
1933		goto out;
1934
1935	memset(&event, 0, sizeof event);
1936	switch (ib_event->event) {
1937	case IB_CM_REQ_ERROR:
1938	case IB_CM_REP_ERROR:
1939		event.event = RDMA_CM_EVENT_UNREACHABLE;
1940		event.status = -ETIMEDOUT;
1941		break;
1942	case IB_CM_REP_RECEIVED:
1943		if (id_priv->id.ps == RDMA_PS_SDP) {
1944			event.status = sdp_verify_rep(ib_event->private_data);
1945			if (event.status)
1946				event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1947			else
1948				event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
1949		} else {
1950			if (id_priv->id.qp) {
1951				event.status = cma_rep_recv(id_priv);
1952				event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
1953							    RDMA_CM_EVENT_ESTABLISHED;
1954			} else {
1955				event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
1956			}
1957		}
1958		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
1959				       ib_event->private_data);
1960		break;
1961	case IB_CM_RTU_RECEIVED:
1962	case IB_CM_USER_ESTABLISHED:
1963		event.event = RDMA_CM_EVENT_ESTABLISHED;
1964		break;
1965	case IB_CM_DREQ_ERROR:
1966		event.status = -ETIMEDOUT; /* fall through */
1967	case IB_CM_DREQ_RECEIVED:
1968	case IB_CM_DREP_RECEIVED:
1969		if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
1970				   RDMA_CM_DISCONNECT))
1971			goto out;
1972		event.event = RDMA_CM_EVENT_DISCONNECTED;
1973		break;
1974	case IB_CM_TIMEWAIT_EXIT:
1975		event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
1976		break;
1977	case IB_CM_MRA_RECEIVED:
1978		/* ignore event */
1979		goto out;
1980	case IB_CM_REJ_RECEIVED:
1981		cma_modify_qp_err(id_priv);
1982		event.status = ib_event->param.rej_rcvd.reason;
1983		event.event = RDMA_CM_EVENT_REJECTED;
1984		event.param.conn.private_data = ib_event->private_data;
1985		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
1986		break;
1987	default:
1988		pr_err("RDMA CMA: unexpected IB CM event: %d\n",
1989		       ib_event->event);
1990		goto out;
1991	}
1992
1993	ret = id_priv->id.event_handler(&id_priv->id, &event);
1994	if (ret) {
1995		/* Destroy the CM ID by returning a non-zero value. */
1996		id_priv->cm_id.ib = NULL;
1997		cma_exch(id_priv, RDMA_CM_DESTROYING);
1998		mutex_unlock(&id_priv->handler_mutex);
1999		rdma_destroy_id(&id_priv->id);
2000		return ret;
2001	}
2002out:
2003	mutex_unlock(&id_priv->handler_mutex);
2004	return ret;
2005}
2006
2007static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
2008					       struct ib_cm_event *ib_event,
2009					       struct net_device *net_dev)
2010{
2011	struct rdma_id_private *id_priv;
2012	struct rdma_cm_id *id;
2013	struct rdma_route *rt;
2014	const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
2015	const __be64 service_id =
2016		      ib_event->param.req_rcvd.primary_path->service_id;
2017	int ret;
2018
2019	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
2020			    listen_id->event_handler, listen_id->context,
2021			    listen_id->ps, ib_event->param.req_rcvd.qp_type);
2022	if (IS_ERR(id))
2023		return NULL;
2024
2025	id_priv = container_of(id, struct rdma_id_private, id);
2026	if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
2027			      (struct sockaddr *)&id->route.addr.dst_addr,
2028			      listen_id, ib_event, ss_family, service_id))
2029		goto err;
2030
2031	rt = &id->route;
2032	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
2033	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
2034			       GFP_KERNEL);
2035	if (!rt->path_rec)
2036		goto err;
2037
2038	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
2039	if (rt->num_paths == 2)
2040		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
2041
2042	if (net_dev) {
2043		ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
2044		if (ret)
2045			goto err;
2046	} else {
2047		if (!cma_protocol_roce(listen_id) &&
2048		    cma_any_addr(cma_src_addr(id_priv))) {
2049			rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
2050			rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
2051			ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
2052		} else if (!cma_any_addr(cma_src_addr(id_priv))) {
2053			ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);
2054			if (ret)
2055				goto err;
2056		}
2057	}
2058	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
2059
2060	id_priv->state = RDMA_CM_CONNECT;
2061	return id_priv;
2062
2063err:
2064	rdma_destroy_id(id);
2065	return NULL;
2066}
2067
2068static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
2069					      struct ib_cm_event *ib_event,
2070					      struct net_device *net_dev)
2071{
2072	struct rdma_id_private *id_priv;
2073	struct rdma_cm_id *id;
2074	const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
2075	struct vnet *net = listen_id->route.addr.dev_addr.net;
2076	int ret;
2077
2078	id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
2079			    listen_id->ps, IB_QPT_UD);
2080	if (IS_ERR(id))
2081		return NULL;
2082
2083	id_priv = container_of(id, struct rdma_id_private, id);
2084	if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
2085			      (struct sockaddr *)&id->route.addr.dst_addr,
2086			      listen_id, ib_event, ss_family,
2087			      ib_event->param.sidr_req_rcvd.service_id))
2088		goto err;
2089
2090	if (net_dev) {
2091		ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
2092		if (ret)
2093			goto err;
2094	} else {
2095		if (!cma_any_addr(cma_src_addr(id_priv))) {
2096			ret = cma_translate_addr(cma_src_addr(id_priv),
2097						 &id->route.addr.dev_addr);
2098			if (ret)
2099				goto err;
2100		}
2101	}
2102
2103	id_priv->state = RDMA_CM_CONNECT;
2104	return id_priv;
2105err:
2106	rdma_destroy_id(id);
2107	return NULL;
2108}
2109
2110static void cma_set_req_event_data(struct rdma_cm_event *event,
2111				   struct ib_cm_req_event_param *req_data,
2112				   void *private_data, int offset)
2113{
2114	event->param.conn.private_data = (char *)private_data + offset;
2115	event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
2116	event->param.conn.responder_resources = req_data->responder_resources;
2117	event->param.conn.initiator_depth = req_data->initiator_depth;
2118	event->param.conn.flow_control = req_data->flow_control;
2119	event->param.conn.retry_count = req_data->retry_count;
2120	event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
2121	event->param.conn.srq = req_data->srq;
2122	event->param.conn.qp_num = req_data->remote_qpn;
2123}
2124
2125static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
2126{
2127	return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
2128		 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
2129		((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
2130		 (id->qp_type == IB_QPT_UD)) ||
2131		(!id->qp_type));
2132}
2133
2134static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
2135{
2136	struct rdma_id_private *listen_id, *conn_id = NULL;
2137	struct rdma_cm_event event;
2138	struct net_device *net_dev;
2139	int offset, ret;
2140
2141	listen_id = cma_id_from_event(cm_id, ib_event, &net_dev);
2142	if (IS_ERR(listen_id))
2143		return PTR_ERR(listen_id);
2144
2145	if (!cma_check_req_qp_type(&listen_id->id, ib_event)) {
2146		ret = -EINVAL;
2147		goto net_dev_put;
2148	}
2149
2150	mutex_lock(&listen_id->handler_mutex);
2151	if (listen_id->state != RDMA_CM_LISTEN) {
2152		ret = -ECONNABORTED;
2153		goto err1;
2154	}
2155
2156	memset(&event, 0, sizeof event);
2157	offset = cma_user_data_offset(listen_id);
2158	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
2159	if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
2160		conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev);
2161		event.param.ud.private_data = (char *)ib_event->private_data + offset;
2162		event.param.ud.private_data_len =
2163				IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
2164	} else {
2165		conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev);
2166		cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
2167				       ib_event->private_data, offset);
2168	}
2169	if (!conn_id) {
2170		ret = -ENOMEM;
2171		goto err1;
2172	}
2173
2174	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
2175	ret = cma_acquire_dev(conn_id, listen_id);
2176	if (ret)
2177		goto err2;
2178
2179	conn_id->cm_id.ib = cm_id;
2180	cm_id->context = conn_id;
2181	cm_id->cm_handler = cma_ib_handler;
2182
2183	/*
2184	 * Protect against the user destroying conn_id from another thread
2185	 * until we're done accessing it.
2186	 */
2187	atomic_inc(&conn_id->refcount);
2188	ret = conn_id->id.event_handler(&conn_id->id, &event);
2189	if (ret)
2190		goto err3;
2191	/*
2192	 * Acquire mutex to prevent user executing rdma_destroy_id()
2193	 * while we're accessing the cm_id.
2194	 */
2195	mutex_lock(&lock);
2196	if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
2197	    (conn_id->id.qp_type != IB_QPT_UD))
2198		ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
2199	mutex_unlock(&lock);
2200	mutex_unlock(&conn_id->handler_mutex);
2201	mutex_unlock(&listen_id->handler_mutex);
2202	cma_deref_id(conn_id);
2203	if (net_dev)
2204		dev_put(net_dev);
2205	return 0;
2206
2207err3:
2208	cma_deref_id(conn_id);
2209	/* Destroy the CM ID by returning a non-zero value. */
2210	conn_id->cm_id.ib = NULL;
2211err2:
2212	cma_exch(conn_id, RDMA_CM_DESTROYING);
2213	mutex_unlock(&conn_id->handler_mutex);
2214err1:
2215	mutex_unlock(&listen_id->handler_mutex);
2216	if (conn_id)
2217		rdma_destroy_id(&conn_id->id);
2218
2219net_dev_put:
2220	if (net_dev)
2221		dev_put(net_dev);
2222
2223	return ret;
2224}
2225
2226__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
2227{
2228	if (addr->sa_family == AF_IB)
2229		return ((struct sockaddr_ib *) addr)->sib_sid;
2230
2231	return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr)));
2232}
2233EXPORT_SYMBOL(rdma_get_service_id);
2234
2235static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
2236{
2237	struct rdma_id_private *id_priv = iw_id->context;
2238	struct rdma_cm_event event;
2239	int ret = 0;
2240	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
2241	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
2242
2243	mutex_lock(&id_priv->handler_mutex);
2244	if (id_priv->state != RDMA_CM_CONNECT)
2245		goto out;
2246
2247	memset(&event, 0, sizeof event);
2248	switch (iw_event->event) {
2249	case IW_CM_EVENT_CLOSE:
2250		event.event = RDMA_CM_EVENT_DISCONNECTED;
2251		break;
2252	case IW_CM_EVENT_CONNECT_REPLY:
2253		memcpy(cma_src_addr(id_priv), laddr,
2254		       rdma_addr_size(laddr));
2255		memcpy(cma_dst_addr(id_priv), raddr,
2256		       rdma_addr_size(raddr));
2257		switch (iw_event->status) {
2258		case 0:
2259			event.event = RDMA_CM_EVENT_ESTABLISHED;
2260			event.param.conn.initiator_depth = iw_event->ird;
2261			event.param.conn.responder_resources = iw_event->ord;
2262			break;
2263		case -ECONNRESET:
2264		case -ECONNREFUSED:
2265			event.event = RDMA_CM_EVENT_REJECTED;
2266			break;
2267		case -ETIMEDOUT:
2268			event.event = RDMA_CM_EVENT_UNREACHABLE;
2269			break;
2270		default:
2271			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
2272			break;
2273		}
2274		break;
2275	case IW_CM_EVENT_ESTABLISHED:
2276		event.event = RDMA_CM_EVENT_ESTABLISHED;
2277		event.param.conn.initiator_depth = iw_event->ird;
2278		event.param.conn.responder_resources = iw_event->ord;
2279		break;
2280	default:
2281		BUG_ON(1);
2282	}
2283
2284	event.status = iw_event->status;
2285	event.param.conn.private_data = iw_event->private_data;
2286	event.param.conn.private_data_len = iw_event->private_data_len;
2287	ret = id_priv->id.event_handler(&id_priv->id, &event);
2288	if (ret) {
2289		/* Destroy the CM ID by returning a non-zero value. */
2290		id_priv->cm_id.iw = NULL;
2291		cma_exch(id_priv, RDMA_CM_DESTROYING);
2292		mutex_unlock(&id_priv->handler_mutex);
2293		rdma_destroy_id(&id_priv->id);
2294		return ret;
2295	}
2296
2297out:
2298	mutex_unlock(&id_priv->handler_mutex);
2299	return ret;
2300}
2301
2302static int iw_conn_req_handler(struct iw_cm_id *cm_id,
2303			       struct iw_cm_event *iw_event)
2304{
2305	struct rdma_cm_id *new_cm_id;
2306	struct rdma_id_private *listen_id, *conn_id;
2307	struct rdma_cm_event event;
2308	int ret = -ECONNABORTED;
2309	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
2310	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
2311
2312	listen_id = cm_id->context;
2313
2314	mutex_lock(&listen_id->handler_mutex);
2315	if (listen_id->state != RDMA_CM_LISTEN)
2316		goto out;
2317
2318	/* Create a new RDMA id for the new IW CM ID */
2319	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
2320				   listen_id->id.event_handler,
2321				   listen_id->id.context,
2322				   RDMA_PS_TCP, IB_QPT_RC);
2323	if (IS_ERR(new_cm_id)) {
2324		ret = -ENOMEM;
2325		goto out;
2326	}
2327	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
2328	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
2329	conn_id->state = RDMA_CM_CONNECT;
2330
2331	ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
2332	if (ret) {
2333		mutex_unlock(&conn_id->handler_mutex);
2334		rdma_destroy_id(new_cm_id);
2335		goto out;
2336	}
2337
2338	ret = cma_acquire_dev(conn_id, listen_id);
2339	if (ret) {
2340		mutex_unlock(&conn_id->handler_mutex);
2341		rdma_destroy_id(new_cm_id);
2342		goto out;
2343	}
2344
2345	conn_id->cm_id.iw = cm_id;
2346	cm_id->context = conn_id;
2347	cm_id->cm_handler = cma_iw_handler;
2348
2349	memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
2350	memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
2351
2352	memset(&event, 0, sizeof event);
2353	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
2354	event.param.conn.private_data = iw_event->private_data;
2355	event.param.conn.private_data_len = iw_event->private_data_len;
2356	event.param.conn.initiator_depth = iw_event->ird;
2357	event.param.conn.responder_resources = iw_event->ord;
2358
2359	/*
2360	 * Protect against the user destroying conn_id from another thread
2361	 * until we're done accessing it.
2362	 */
2363	atomic_inc(&conn_id->refcount);
2364	ret = conn_id->id.event_handler(&conn_id->id, &event);
2365	if (ret) {
2366		/* User wants to destroy the CM ID */
2367		conn_id->cm_id.iw = NULL;
2368		cma_exch(conn_id, RDMA_CM_DESTROYING);
2369		mutex_unlock(&conn_id->handler_mutex);
2370		cma_deref_id(conn_id);
2371		rdma_destroy_id(&conn_id->id);
2372		goto out;
2373	}
2374
2375	mutex_unlock(&conn_id->handler_mutex);
2376	cma_deref_id(conn_id);
2377
2378out:
2379	mutex_unlock(&listen_id->handler_mutex);
2380	return ret;
2381}
2382
2383static int cma_ib_listen(struct rdma_id_private *id_priv)
2384{
2385	struct sockaddr *addr;
2386	struct ib_cm_id	*id;
2387	__be64 svc_id;
2388
2389	addr = cma_src_addr(id_priv);
2390	svc_id = rdma_get_service_id(&id_priv->id, addr);
2391	id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id);
2392	if (IS_ERR(id))
2393		return PTR_ERR(id);
2394	id_priv->cm_id.ib = id;
2395
2396	return 0;
2397}
2398
2399static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
2400{
2401	int ret;
2402	struct iw_cm_id	*id;
2403
2404	id = iw_create_cm_id(id_priv->id.device,
2405			     iw_conn_req_handler,
2406			     id_priv);
2407	if (IS_ERR(id))
2408		return PTR_ERR(id);
2409
2410	id->tos = id_priv->tos;
2411	id_priv->cm_id.iw = id;
2412
2413	memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
2414	       rdma_addr_size(cma_src_addr(id_priv)));
2415
2416	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
2417
2418	if (ret) {
2419		iw_destroy_cm_id(id_priv->cm_id.iw);
2420		id_priv->cm_id.iw = NULL;
2421	}
2422
2423	return ret;
2424}
2425
2426static int cma_listen_handler(struct rdma_cm_id *id,
2427			      struct rdma_cm_event *event)
2428{
2429	struct rdma_id_private *id_priv = id->context;
2430
2431	id->context = id_priv->id.context;
2432	id->event_handler = id_priv->id.event_handler;
2433	return id_priv->id.event_handler(id, event);
2434}
2435
2436static void cma_listen_on_dev(struct rdma_id_private *id_priv,
2437			      struct cma_device *cma_dev)
2438{
2439	struct rdma_id_private *dev_id_priv;
2440	struct rdma_cm_id *id;
2441	struct vnet *net = id_priv->id.route.addr.dev_addr.net;
2442	int ret;
2443
2444	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
2445		return;
2446
2447	id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
2448			    id_priv->id.qp_type);
2449	if (IS_ERR(id))
2450		return;
2451
2452	dev_id_priv = container_of(id, struct rdma_id_private, id);
2453
2454	dev_id_priv->state = RDMA_CM_ADDR_BOUND;
2455	memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
2456	       rdma_addr_size(cma_src_addr(id_priv)));
2457
2458	_cma_attach_to_dev(dev_id_priv, cma_dev);
2459	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
2460	atomic_inc(&id_priv->refcount);
2461	dev_id_priv->internal_id = 1;
2462	dev_id_priv->afonly = id_priv->afonly;
2463
2464	ret = rdma_listen(id, id_priv->backlog);
2465	if (ret)
2466		pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
2467			ret, cma_dev->device->name);
2468}
2469
2470static void cma_listen_on_all(struct rdma_id_private *id_priv)
2471{
2472	struct cma_device *cma_dev;
2473
2474	mutex_lock(&lock);
2475	list_add_tail(&id_priv->list, &listen_any_list);
2476	list_for_each_entry(cma_dev, &dev_list, list)
2477		cma_listen_on_dev(id_priv, cma_dev);
2478	mutex_unlock(&lock);
2479}
2480
2481void rdma_set_service_type(struct rdma_cm_id *id, int tos)
2482{
2483	struct rdma_id_private *id_priv;
2484
2485	id_priv = container_of(id, struct rdma_id_private, id);
2486	id_priv->tos = (u8) tos;
2487}
2488EXPORT_SYMBOL(rdma_set_service_type);
2489
2490static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
2491			      void *context)
2492{
2493	struct cma_work *work = context;
2494	struct rdma_route *route;
2495
2496	route = &work->id->id.route;
2497
2498	if (!status) {
2499		route->num_paths = 1;
2500		*route->path_rec = *path_rec;
2501	} else {
2502		work->old_state = RDMA_CM_ROUTE_QUERY;
2503		work->new_state = RDMA_CM_ADDR_RESOLVED;
2504		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
2505		work->event.status = status;
2506	}
2507
2508	queue_work(cma_wq, &work->work);
2509}
2510
2511static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
2512			      struct cma_work *work)
2513{
2514	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2515	struct ib_sa_path_rec path_rec;
2516	ib_sa_comp_mask comp_mask;
2517	struct sockaddr_in6 *sin6;
2518	struct sockaddr_ib *sib;
2519
2520	memset(&path_rec, 0, sizeof path_rec);
2521	rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
2522	rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
2523	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2524	path_rec.numb_path = 1;
2525	path_rec.reversible = 1;
2526	path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
2527
2528	comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
2529		    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
2530		    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
2531
2532	switch (cma_family(id_priv)) {
2533	case AF_INET:
2534		path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
2535		comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
2536		break;
2537	case AF_INET6:
2538		sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
2539		path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
2540		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
2541		break;
2542	case AF_IB:
2543		sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
2544		path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20);
2545		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
2546		break;
2547	}
2548
2549	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
2550					       id_priv->id.port_num, &path_rec,
2551					       comp_mask, timeout_ms,
2552					       GFP_KERNEL, cma_query_handler,
2553					       work, &id_priv->query);
2554
2555	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
2556}
2557
2558static void cma_work_handler(struct work_struct *_work)
2559{
2560	struct cma_work *work = container_of(_work, struct cma_work, work);
2561	struct rdma_id_private *id_priv = work->id;
2562	int destroy = 0;
2563
2564	mutex_lock(&id_priv->handler_mutex);
2565	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
2566		goto out;
2567
2568	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
2569		cma_exch(id_priv, RDMA_CM_DESTROYING);
2570		destroy = 1;
2571	}
2572out:
2573	mutex_unlock(&id_priv->handler_mutex);
2574	cma_deref_id(id_priv);
2575	if (destroy)
2576		rdma_destroy_id(&id_priv->id);
2577	kfree(work);
2578}
2579
2580static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
2581{
2582	struct rdma_route *route = &id_priv->id.route;
2583	struct cma_work *work;
2584	int ret;
2585
2586	work = kzalloc(sizeof *work, GFP_KERNEL);
2587	if (!work)
2588		return -ENOMEM;
2589
2590	work->id = id_priv;
2591	INIT_WORK(&work->work, cma_work_handler);
2592	work->old_state = RDMA_CM_ROUTE_QUERY;
2593	work->new_state = RDMA_CM_ROUTE_RESOLVED;
2594	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2595
2596	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
2597	if (!route->path_rec) {
2598		ret = -ENOMEM;
2599		goto err1;
2600	}
2601
2602	ret = cma_query_ib_route(id_priv, timeout_ms, work);
2603	if (ret)
2604		goto err2;
2605
2606	return 0;
2607err2:
2608	kfree(route->path_rec);
2609	route->path_rec = NULL;
2610err1:
2611	kfree(work);
2612	return ret;
2613}
2614
2615int rdma_set_ib_paths(struct rdma_cm_id *id,
2616		      struct ib_sa_path_rec *path_rec, int num_paths)
2617{
2618	struct rdma_id_private *id_priv;
2619	int ret;
2620
2621	id_priv = container_of(id, struct rdma_id_private, id);
2622	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
2623			   RDMA_CM_ROUTE_RESOLVED))
2624		return -EINVAL;
2625
2626	id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
2627				     GFP_KERNEL);
2628	if (!id->route.path_rec) {
2629		ret = -ENOMEM;
2630		goto err;
2631	}
2632
2633	id->route.num_paths = num_paths;
2634	return 0;
2635err:
2636	cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
2637	return ret;
2638}
2639EXPORT_SYMBOL(rdma_set_ib_paths);
2640
2641static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
2642{
2643	struct cma_work *work;
2644
2645	work = kzalloc(sizeof *work, GFP_KERNEL);
2646	if (!work)
2647		return -ENOMEM;
2648
2649	work->id = id_priv;
2650	INIT_WORK(&work->work, cma_work_handler);
2651	work->old_state = RDMA_CM_ROUTE_QUERY;
2652	work->new_state = RDMA_CM_ROUTE_RESOLVED;
2653	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2654	queue_work(cma_wq, &work->work);
2655	return 0;
2656}
2657
2658static int iboe_tos_to_sl(struct net_device *ndev, int tos)
2659{
2660	/* get service level, SL, from IPv4 type of service, TOS */
2661	int sl = (tos >> 5) & 0x7;
2662
2663	/* final mappings are done by the vendor specific drivers */
2664	return sl;
2665}
2666
2667static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
2668					   unsigned long supported_gids,
2669					   enum ib_gid_type default_gid)
2670{
2671	if ((network_type == RDMA_NETWORK_IPV4 ||
2672	     network_type == RDMA_NETWORK_IPV6) &&
2673	    test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
2674		return IB_GID_TYPE_ROCE_UDP_ENCAP;
2675
2676	return default_gid;
2677}
2678
2679static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2680{
2681	struct rdma_route *route = &id_priv->id.route;
2682	struct rdma_addr *addr = &route->addr;
2683	struct cma_work *work;
2684	int ret;
2685	struct net_device *ndev = NULL;
2686
2687
2688	work = kzalloc(sizeof *work, GFP_KERNEL);
2689	if (!work)
2690		return -ENOMEM;
2691
2692	work->id = id_priv;
2693	INIT_WORK(&work->work, cma_work_handler);
2694
2695	route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
2696	if (!route->path_rec) {
2697		ret = -ENOMEM;
2698		goto err1;
2699	}
2700
2701	route->num_paths = 1;
2702
2703	if (addr->dev_addr.bound_dev_if) {
2704		unsigned long supported_gids;
2705
2706		ndev = dev_get_by_index(addr->dev_addr.net,
2707					addr->dev_addr.bound_dev_if);
2708		if (!ndev) {
2709			ret = -ENODEV;
2710			goto err2;
2711		}
2712
2713		route->path_rec->net = ndev->if_vnet;
2714		route->path_rec->ifindex = ndev->if_index;
2715		supported_gids = roce_gid_type_mask_support(id_priv->id.device,
2716							    id_priv->id.port_num);
2717		route->path_rec->gid_type =
2718			cma_route_gid_type(addr->dev_addr.network,
2719					   supported_gids,
2720					   id_priv->gid_type);
2721	}
2722	if (!ndev) {
2723		ret = -ENODEV;
2724		goto err2;
2725	}
2726
2727	memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
2728
2729	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
2730		    &route->path_rec->sgid);
2731	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
2732		    &route->path_rec->dgid);
2733
2734	/* Use the hint from IP Stack to select GID Type */
2735	if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
2736		route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
2737	if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
2738		/* TODO: get the hoplimit from the inet/inet6 device */
2739		route->path_rec->hop_limit = addr->dev_addr.hoplimit;
2740	else
2741		route->path_rec->hop_limit = 1;
2742	route->path_rec->reversible = 1;
2743	route->path_rec->pkey = cpu_to_be16(0xffff);
2744	route->path_rec->mtu_selector = IB_SA_EQ;
2745	route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);
2746	route->path_rec->traffic_class = id_priv->tos;
2747	route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu);
2748	route->path_rec->rate_selector = IB_SA_EQ;
2749	route->path_rec->rate = iboe_get_rate(ndev);
2750	dev_put(ndev);
2751	route->path_rec->packet_life_time_selector = IB_SA_EQ;
2752	route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
2753	if (!route->path_rec->mtu) {
2754		ret = -EINVAL;
2755		goto err2;
2756	}
2757
2758	work->old_state = RDMA_CM_ROUTE_QUERY;
2759	work->new_state = RDMA_CM_ROUTE_RESOLVED;
2760	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2761	work->event.status = 0;
2762
2763	queue_work(cma_wq, &work->work);
2764
2765	return 0;
2766
2767err2:
2768	kfree(route->path_rec);
2769	route->path_rec = NULL;
2770err1:
2771	kfree(work);
2772	return ret;
2773}
2774
2775int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
2776{
2777	struct rdma_id_private *id_priv;
2778	int ret;
2779
2780	id_priv = container_of(id, struct rdma_id_private, id);
2781	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
2782		return -EINVAL;
2783
2784	atomic_inc(&id_priv->refcount);
2785	if (rdma_cap_ib_sa(id->device, id->port_num))
2786		ret = cma_resolve_ib_route(id_priv, timeout_ms);
2787	else if (rdma_protocol_roce(id->device, id->port_num))
2788		ret = cma_resolve_iboe_route(id_priv);
2789	else if (rdma_protocol_iwarp(id->device, id->port_num))
2790		ret = cma_resolve_iw_route(id_priv, timeout_ms);
2791	else
2792		ret = -ENOSYS;
2793
2794	if (ret)
2795		goto err;
2796
2797	return 0;
2798err:
2799	cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
2800	cma_deref_id(id_priv);
2801	return ret;
2802}
2803EXPORT_SYMBOL(rdma_resolve_route);
2804
2805static void cma_set_loopback(struct sockaddr *addr)
2806{
2807	switch (addr->sa_family) {
2808	case AF_INET:
2809		((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
2810		break;
2811	case AF_INET6:
2812		ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr,
2813			      0, 0, 0, htonl(1));
2814		break;
2815	default:
2816		ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr,
2817			    0, 0, 0, htonl(1));
2818		break;
2819	}
2820}
2821
2822static int cma_bind_loopback(struct rdma_id_private *id_priv)
2823{
2824	struct cma_device *cma_dev, *cur_dev;
2825	struct ib_port_attr port_attr;
2826	union ib_gid gid;
2827	u16 pkey;
2828	int ret;
2829	u8 p;
2830
2831	cma_dev = NULL;
2832	mutex_lock(&lock);
2833	list_for_each_entry(cur_dev, &dev_list, list) {
2834		if (cma_family(id_priv) == AF_IB &&
2835		    !rdma_cap_ib_cm(cur_dev->device, 1))
2836			continue;
2837
2838		if (!cma_dev)
2839			cma_dev = cur_dev;
2840
2841		for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
2842			if (!ib_query_port(cur_dev->device, p, &port_attr) &&
2843			    port_attr.state == IB_PORT_ACTIVE) {
2844				cma_dev = cur_dev;
2845				goto port_found;
2846			}
2847		}
2848	}
2849
2850	if (!cma_dev) {
2851		ret = -ENODEV;
2852		goto out;
2853	}
2854
2855	p = 1;
2856
2857port_found:
2858	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
2859	if (ret)
2860		goto out;
2861
2862	ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
2863	if (ret)
2864		goto out;
2865
2866	id_priv->id.route.addr.dev_addr.dev_type =
2867		(rdma_protocol_ib(cma_dev->device, p)) ?
2868		ARPHRD_INFINIBAND : ARPHRD_ETHER;
2869
2870	rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2871	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
2872	id_priv->id.port_num = p;
2873	cma_attach_to_dev(id_priv, cma_dev);
2874	cma_set_loopback(cma_src_addr(id_priv));
2875out:
2876	mutex_unlock(&lock);
2877	return ret;
2878}
2879
2880static void addr_handler(int status, struct sockaddr *src_addr,
2881			 struct rdma_dev_addr *dev_addr, void *context)
2882{
2883	struct rdma_id_private *id_priv = context;
2884	struct rdma_cm_event event;
2885
2886	memset(&event, 0, sizeof event);
2887	mutex_lock(&id_priv->handler_mutex);
2888	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
2889			   RDMA_CM_ADDR_RESOLVED))
2890		goto out;
2891
2892	memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
2893	if (!status && !id_priv->cma_dev)
2894		status = cma_acquire_dev(id_priv, NULL);
2895
2896	if (status) {
2897		if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
2898				   RDMA_CM_ADDR_BOUND))
2899			goto out;
2900		event.event = RDMA_CM_EVENT_ADDR_ERROR;
2901		event.status = status;
2902	} else
2903		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2904
2905	if (id_priv->id.event_handler(&id_priv->id, &event)) {
2906		cma_exch(id_priv, RDMA_CM_DESTROYING);
2907		mutex_unlock(&id_priv->handler_mutex);
2908		cma_deref_id(id_priv);
2909		rdma_destroy_id(&id_priv->id);
2910		return;
2911	}
2912out:
2913	mutex_unlock(&id_priv->handler_mutex);
2914	cma_deref_id(id_priv);
2915}
2916
2917static int cma_resolve_loopback(struct rdma_id_private *id_priv)
2918{
2919	struct cma_work *work;
2920	union ib_gid gid;
2921	int ret;
2922
2923	work = kzalloc(sizeof *work, GFP_KERNEL);
2924	if (!work)
2925		return -ENOMEM;
2926
2927	if (!id_priv->cma_dev) {
2928		ret = cma_bind_loopback(id_priv);
2929		if (ret)
2930			goto err;
2931	}
2932
2933	rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2934	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
2935
2936	work->id = id_priv;
2937	INIT_WORK(&work->work, cma_work_handler);
2938	work->old_state = RDMA_CM_ADDR_QUERY;
2939	work->new_state = RDMA_CM_ADDR_RESOLVED;
2940	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2941	queue_work(cma_wq, &work->work);
2942	return 0;
2943err:
2944	kfree(work);
2945	return ret;
2946}
2947
2948static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
2949{
2950	struct cma_work *work;
2951	int ret;
2952
2953	work = kzalloc(sizeof *work, GFP_KERNEL);
2954	if (!work)
2955		return -ENOMEM;
2956
2957	if (!id_priv->cma_dev) {
2958		ret = cma_resolve_ib_dev(id_priv);
2959		if (ret)
2960			goto err;
2961	}
2962
2963	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
2964		&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
2965
2966	work->id = id_priv;
2967	INIT_WORK(&work->work, cma_work_handler);
2968	work->old_state = RDMA_CM_ADDR_QUERY;
2969	work->new_state = RDMA_CM_ADDR_RESOLVED;
2970	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2971	queue_work(cma_wq, &work->work);
2972	return 0;
2973err:
2974	kfree(work);
2975	return ret;
2976}
2977
2978static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2979			 struct sockaddr *dst_addr)
2980{
2981	if (!src_addr || !src_addr->sa_family) {
2982		src_addr = (struct sockaddr *) &id->route.addr.src_addr;
2983		src_addr->sa_family = dst_addr->sa_family;
2984		if (dst_addr->sa_family == AF_INET6) {
2985			struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
2986			struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
2987			src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
2988			if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr) ||
2989			    IN6_IS_ADDR_MC_INTFACELOCAL(&dst_addr6->sin6_addr))
2990				id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
2991		} else if (dst_addr->sa_family == AF_IB) {
2992			((struct sockaddr_ib *) src_addr)->sib_pkey =
2993				((struct sockaddr_ib *) dst_addr)->sib_pkey;
2994		}
2995	}
2996	return rdma_bind_addr(id, src_addr);
2997}
2998
2999int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
3000		      struct sockaddr *dst_addr, int timeout_ms)
3001{
3002	struct rdma_id_private *id_priv;
3003	int ret;
3004
3005	id_priv = container_of(id, struct rdma_id_private, id);
3006	if (id_priv->state == RDMA_CM_IDLE) {
3007		ret = cma_bind_addr(id, src_addr, dst_addr);
3008		if (ret)
3009			return ret;
3010	}
3011
3012	if (cma_family(id_priv) != dst_addr->sa_family)
3013		return -EINVAL;
3014
3015	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
3016		return -EINVAL;
3017
3018	atomic_inc(&id_priv->refcount);
3019	memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
3020	if (cma_any_addr(dst_addr)) {
3021		ret = cma_resolve_loopback(id_priv);
3022	} else {
3023		if (dst_addr->sa_family == AF_IB) {
3024			ret = cma_resolve_ib_addr(id_priv);
3025		} else {
3026			ret = cma_check_linklocal(&id->route.addr.dev_addr, dst_addr);
3027			if (ret)
3028				goto err;
3029
3030			ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
3031					      dst_addr, &id->route.addr.dev_addr,
3032					      timeout_ms, addr_handler, id_priv);
3033		}
3034	}
3035	if (ret)
3036		goto err;
3037
3038	return 0;
3039err:
3040	cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
3041	cma_deref_id(id_priv);
3042	return ret;
3043}
3044EXPORT_SYMBOL(rdma_resolve_addr);
3045
3046int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
3047{
3048	struct rdma_id_private *id_priv;
3049	unsigned long flags;
3050	int ret;
3051
3052	id_priv = container_of(id, struct rdma_id_private, id);
3053	spin_lock_irqsave(&id_priv->lock, flags);
3054	if (reuse || id_priv->state == RDMA_CM_IDLE) {
3055		id_priv->reuseaddr = reuse;
3056		ret = 0;
3057	} else {
3058		ret = -EINVAL;
3059	}
3060	spin_unlock_irqrestore(&id_priv->lock, flags);
3061	return ret;
3062}
3063EXPORT_SYMBOL(rdma_set_reuseaddr);
3064
3065int rdma_set_afonly(struct rdma_cm_id *id, int afonly)
3066{
3067	struct rdma_id_private *id_priv;
3068	unsigned long flags;
3069	int ret;
3070
3071	id_priv = container_of(id, struct rdma_id_private, id);
3072	spin_lock_irqsave(&id_priv->lock, flags);
3073	if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) {
3074		id_priv->options |= (1 << CMA_OPTION_AFONLY);
3075		id_priv->afonly = afonly;
3076		ret = 0;
3077	} else {
3078		ret = -EINVAL;
3079	}
3080	spin_unlock_irqrestore(&id_priv->lock, flags);
3081	return ret;
3082}
3083EXPORT_SYMBOL(rdma_set_afonly);
3084
3085static void cma_bind_port(struct rdma_bind_list *bind_list,
3086			  struct rdma_id_private *id_priv)
3087{
3088	struct sockaddr *addr;
3089	struct sockaddr_ib *sib;
3090	u64 sid, mask;
3091	__be16 port;
3092
3093	addr = cma_src_addr(id_priv);
3094	port = htons(bind_list->port);
3095
3096	switch (addr->sa_family) {
3097	case AF_INET:
3098		((struct sockaddr_in *) addr)->sin_port = port;
3099		break;
3100	case AF_INET6:
3101		((struct sockaddr_in6 *) addr)->sin6_port = port;
3102		break;
3103	case AF_IB:
3104		sib = (struct sockaddr_ib *) addr;
3105		sid = be64_to_cpu(sib->sib_sid);
3106		mask = be64_to_cpu(sib->sib_sid_mask);
3107		sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port));
3108		sib->sib_sid_mask = cpu_to_be64(~0ULL);
3109		break;
3110	}
3111	id_priv->bind_list = bind_list;
3112	hlist_add_head(&id_priv->node, &bind_list->owners);
3113}
3114
3115static int cma_alloc_port(enum rdma_port_space ps,
3116			  struct rdma_id_private *id_priv, unsigned short snum)
3117{
3118	struct rdma_bind_list *bind_list;
3119	int ret;
3120
3121	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
3122	if (!bind_list)
3123		return -ENOMEM;
3124
3125	ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list,
3126			   snum);
3127	if (ret < 0)
3128		goto err;
3129
3130	bind_list->ps = ps;
3131	bind_list->port = (unsigned short)ret;
3132	cma_bind_port(bind_list, id_priv);
3133	return 0;
3134err:
3135	kfree(bind_list);
3136	return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
3137}
3138
3139static int cma_alloc_any_port(enum rdma_port_space ps,
3140			      struct rdma_id_private *id_priv)
3141{
3142	static unsigned int last_used_port;
3143	int low, high, remaining;
3144	unsigned int rover;
3145	struct vnet *net = id_priv->id.route.addr.dev_addr.net;
3146	u32 rand;
3147
3148	inet_get_local_port_range(net, &low, &high);
3149	remaining = (high - low) + 1;
3150	get_random_bytes(&rand, sizeof(rand));
3151	rover = rand % remaining + low;
3152retry:
3153	if (last_used_port != rover &&
3154	    !cma_ps_find(net, ps, (unsigned short)rover)) {
3155		int ret = cma_alloc_port(ps, id_priv, rover);
3156		/*
3157		 * Remember previously used port number in order to avoid
3158		 * re-using same port immediately after it is closed.
3159		 */
3160		if (!ret)
3161			last_used_port = rover;
3162		if (ret != -EADDRNOTAVAIL)
3163			return ret;
3164	}
3165	if (--remaining) {
3166		rover++;
3167		if ((rover < low) || (rover > high))
3168			rover = low;
3169		goto retry;
3170	}
3171	return -EADDRNOTAVAIL;
3172}
3173
3174/*
3175 * Check that the requested port is available.  This is called when trying to
3176 * bind to a specific port, or when trying to listen on a bound port.  In
3177 * the latter case, the provided id_priv may already be on the bind_list, but
3178 * we still need to check that it's okay to start listening.
3179 */
3180static int cma_check_port(struct rdma_bind_list *bind_list,
3181			  struct rdma_id_private *id_priv, uint8_t reuseaddr)
3182{
3183	struct rdma_id_private *cur_id;
3184	struct sockaddr *addr, *cur_addr;
3185
3186	addr = cma_src_addr(id_priv);
3187	hlist_for_each_entry(cur_id, &bind_list->owners, node) {
3188		if (id_priv == cur_id)
3189			continue;
3190
3191		if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
3192		    cur_id->reuseaddr)
3193			continue;
3194
3195		cur_addr = cma_src_addr(cur_id);
3196		if (id_priv->afonly && cur_id->afonly &&
3197		    (addr->sa_family != cur_addr->sa_family))
3198			continue;
3199
3200		if (cma_any_addr(addr) || cma_any_addr(cur_addr))
3201			return -EADDRNOTAVAIL;
3202
3203		if (!cma_addr_cmp(addr, cur_addr))
3204			return -EADDRINUSE;
3205	}
3206	return 0;
3207}
3208
3209static int cma_use_port(enum rdma_port_space ps,
3210			struct rdma_id_private *id_priv)
3211{
3212	struct rdma_bind_list *bind_list;
3213	unsigned short snum;
3214	int ret;
3215
3216	snum = ntohs(cma_port(cma_src_addr(id_priv)));
3217	if (snum < IPPORT_RESERVED &&
3218	    priv_check(curthread, PRIV_NETINET_BINDANY) != 0)
3219		return -EACCES;
3220
3221	bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum);
3222	if (!bind_list) {
3223		ret = cma_alloc_port(ps, id_priv, snum);
3224	} else {
3225		ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
3226		if (!ret)
3227			cma_bind_port(bind_list, id_priv);
3228	}
3229	return ret;
3230}
3231
3232static int cma_bind_listen(struct rdma_id_private *id_priv)
3233{
3234	struct rdma_bind_list *bind_list = id_priv->bind_list;
3235	int ret = 0;
3236
3237	mutex_lock(&lock);
3238	if (bind_list->owners.first->next)
3239		ret = cma_check_port(bind_list, id_priv, 0);
3240	mutex_unlock(&lock);
3241	return ret;
3242}
3243
3244static enum rdma_port_space cma_select_inet_ps(
3245		struct rdma_id_private *id_priv)
3246{
3247	switch (id_priv->id.ps) {
3248	case RDMA_PS_TCP:
3249	case RDMA_PS_UDP:
3250	case RDMA_PS_IPOIB:
3251	case RDMA_PS_IB:
3252	case RDMA_PS_SDP:
3253		return id_priv->id.ps;
3254	default:
3255
3256		return 0;
3257	}
3258}
3259
3260static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
3261{
3262	enum rdma_port_space ps = 0;
3263	struct sockaddr_ib *sib;
3264	u64 sid_ps, mask, sid;
3265
3266	sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
3267	mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK;
3268	sid = be64_to_cpu(sib->sib_sid) & mask;
3269
3270	if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) {
3271		sid_ps = RDMA_IB_IP_PS_IB;
3272		ps = RDMA_PS_IB;
3273	} else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) &&
3274		   (sid == (RDMA_IB_IP_PS_TCP & mask))) {
3275		sid_ps = RDMA_IB_IP_PS_TCP;
3276		ps = RDMA_PS_TCP;
3277	} else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) &&
3278		   (sid == (RDMA_IB_IP_PS_UDP & mask))) {
3279		sid_ps = RDMA_IB_IP_PS_UDP;
3280		ps = RDMA_PS_UDP;
3281	}
3282
3283	if (ps) {
3284		sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib)));
3285		sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK |
3286						be64_to_cpu(sib->sib_sid_mask));
3287	}
3288	return ps;
3289}
3290
3291static int cma_get_port(struct rdma_id_private *id_priv)
3292{
3293	enum rdma_port_space ps;
3294	int ret;
3295
3296	if (cma_family(id_priv) != AF_IB)
3297		ps = cma_select_inet_ps(id_priv);
3298	else
3299		ps = cma_select_ib_ps(id_priv);
3300	if (!ps)
3301		return -EPROTONOSUPPORT;
3302
3303	mutex_lock(&lock);
3304	if (cma_any_port(cma_src_addr(id_priv)))
3305		ret = cma_alloc_any_port(ps, id_priv);
3306	else
3307		ret = cma_use_port(ps, id_priv);
3308	mutex_unlock(&lock);
3309
3310	return ret;
3311}
3312
3313static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
3314			       struct sockaddr *addr)
3315{
3316#ifdef INET6
3317	struct sockaddr_in6 sin6;
3318
3319	if (addr->sa_family != AF_INET6)
3320		return 0;
3321
3322	sin6 = *(struct sockaddr_in6 *)addr;
3323
3324	if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr) ||
3325	    IN6_IS_ADDR_MC_INTFACELOCAL(&sin6.sin6_addr)) {
3326		bool failure;
3327
3328		CURVNET_SET_QUIET(dev_addr->net);
3329		failure = sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0;
3330		CURVNET_RESTORE();
3331
3332		/* check if IPv6 scope ID is not set */
3333		if (failure)
3334			return -EINVAL;
3335		dev_addr->bound_dev_if = sin6.sin6_scope_id;
3336	}
3337#endif
3338	return 0;
3339}
3340
3341int rdma_listen(struct rdma_cm_id *id, int backlog)
3342{
3343	struct rdma_id_private *id_priv;
3344	int ret;
3345
3346	id_priv = container_of(id, struct rdma_id_private, id);
3347	if (id_priv->state == RDMA_CM_IDLE) {
3348		id->route.addr.src_addr.ss_family = AF_INET;
3349		ret = rdma_bind_addr(id, cma_src_addr(id_priv));
3350		if (ret)
3351			return ret;
3352	}
3353
3354	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
3355		return -EINVAL;
3356
3357	if (id_priv->reuseaddr) {
3358		ret = cma_bind_listen(id_priv);
3359		if (ret)
3360			goto err;
3361	}
3362
3363	id_priv->backlog = backlog;
3364	if (id->device) {
3365		if (rdma_cap_ib_cm(id->device, 1)) {
3366			ret = cma_ib_listen(id_priv);
3367			if (ret)
3368				goto err;
3369		} else if (rdma_cap_iw_cm(id->device, 1)) {
3370			ret = cma_iw_listen(id_priv, backlog);
3371			if (ret)
3372				goto err;
3373		} else {
3374			ret = -ENOSYS;
3375			goto err;
3376		}
3377	} else
3378		cma_listen_on_all(id_priv);
3379
3380	return 0;
3381err:
3382	id_priv->backlog = 0;
3383	cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
3384	return ret;
3385}
3386EXPORT_SYMBOL(rdma_listen);
3387
3388int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
3389{
3390	struct rdma_id_private *id_priv;
3391	int ret;
3392
3393	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
3394	    addr->sa_family != AF_IB)
3395		return -EAFNOSUPPORT;
3396
3397	id_priv = container_of(id, struct rdma_id_private, id);
3398	if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
3399		return -EINVAL;
3400
3401	ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
3402	if (ret)
3403		goto err1;
3404
3405	memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
3406	if (!cma_any_addr(addr)) {
3407		ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
3408		if (ret)
3409			goto err1;
3410
3411		ret = cma_acquire_dev(id_priv, NULL);
3412		if (ret)
3413			goto err1;
3414	}
3415
3416	if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
3417		if (addr->sa_family == AF_INET)
3418			id_priv->afonly = 1;
3419#ifdef INET6
3420		else if (addr->sa_family == AF_INET6) {
3421			CURVNET_SET_QUIET(id_priv->id.route.addr.dev_addr.net);
3422			id_priv->afonly = V_ip6_v6only;
3423			CURVNET_RESTORE();
3424		}
3425#endif
3426	}
3427	ret = cma_get_port(id_priv);
3428	if (ret)
3429		goto err2;
3430
3431	return 0;
3432err2:
3433	if (id_priv->cma_dev)
3434		cma_release_dev(id_priv);
3435err1:
3436	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
3437	return ret;
3438}
3439EXPORT_SYMBOL(rdma_bind_addr);
3440
3441static int sdp_format_hdr(struct sdp_hh *sdp_hdr, struct rdma_id_private *id_priv)
3442{
3443	/*
3444	 * XXXCEM: CMA just sets the version itself rather than relying on
3445	 * passed in packet to have the major version set.  Should we?
3446	 */
3447	if (sdp_get_majv(sdp_hdr->majv_minv) != SDP_MAJ_VERSION)
3448		return -EINVAL;
3449
3450	if (cma_family(id_priv) == AF_INET) {
3451		struct sockaddr_in *src4, *dst4;
3452
3453		src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
3454		dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
3455
3456		sdp_set_ip_ver(sdp_hdr, 4);
3457		sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
3458		sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
3459		sdp_hdr->port = src4->sin_port;
3460	} else if (cma_family(id_priv) == AF_INET6) {
3461		struct sockaddr_in6 *src6, *dst6;
3462
3463		src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
3464		dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
3465
3466		sdp_set_ip_ver(sdp_hdr, 6);
3467		sdp_hdr->src_addr.ip6 = src6->sin6_addr;
3468		sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
3469		sdp_hdr->port = src6->sin6_port;
3470		cma_ip6_clear_scope_id(&sdp_hdr->src_addr.ip6);
3471		cma_ip6_clear_scope_id(&sdp_hdr->dst_addr.ip6);
3472	} else
3473		return -EAFNOSUPPORT;
3474	return 0;
3475}
3476
3477static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
3478{
3479	struct cma_hdr *cma_hdr;
3480
3481	if (id_priv->id.ps == RDMA_PS_SDP)
3482		return sdp_format_hdr(hdr, id_priv);
3483
3484	cma_hdr = hdr;
3485	cma_hdr->cma_version = CMA_VERSION;
3486	if (cma_family(id_priv) == AF_INET) {
3487		struct sockaddr_in *src4, *dst4;
3488
3489		src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
3490		dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
3491
3492		cma_set_ip_ver(cma_hdr, 4);
3493		cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
3494		cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
3495		cma_hdr->port = src4->sin_port;
3496	} else if (cma_family(id_priv) == AF_INET6) {
3497		struct sockaddr_in6 *src6, *dst6;
3498
3499		src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
3500		dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
3501
3502		cma_set_ip_ver(cma_hdr, 6);
3503		cma_hdr->src_addr.ip6 = src6->sin6_addr;
3504		cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
3505		cma_hdr->port = src6->sin6_port;
3506		cma_ip6_clear_scope_id(&cma_hdr->src_addr.ip6);
3507		cma_ip6_clear_scope_id(&cma_hdr->dst_addr.ip6);
3508	}
3509	return 0;
3510}
3511
3512static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
3513				struct ib_cm_event *ib_event)
3514{
3515	struct rdma_id_private *id_priv = cm_id->context;
3516	struct rdma_cm_event event;
3517	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
3518	int ret = 0;
3519
3520	mutex_lock(&id_priv->handler_mutex);
3521	if (id_priv->state != RDMA_CM_CONNECT)
3522		goto out;
3523
3524	memset(&event, 0, sizeof event);
3525	switch (ib_event->event) {
3526	case IB_CM_SIDR_REQ_ERROR:
3527		event.event = RDMA_CM_EVENT_UNREACHABLE;
3528		event.status = -ETIMEDOUT;
3529		break;
3530	case IB_CM_SIDR_REP_RECEIVED:
3531		event.param.ud.private_data = ib_event->private_data;
3532		event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
3533		if (rep->status != IB_SIDR_SUCCESS) {
3534			event.event = RDMA_CM_EVENT_UNREACHABLE;
3535			event.status = ib_event->param.sidr_rep_rcvd.status;
3536			break;
3537		}
3538		ret = cma_set_qkey(id_priv, rep->qkey);
3539		if (ret) {
3540			event.event = RDMA_CM_EVENT_ADDR_ERROR;
3541			event.status = ret;
3542			break;
3543		}
3544		ret = ib_init_ah_from_path(id_priv->id.device,
3545					   id_priv->id.port_num,
3546					   id_priv->id.route.path_rec,
3547					   &event.param.ud.ah_attr);
3548		if (ret) {
3549			event.event = RDMA_CM_EVENT_ADDR_ERROR;
3550			event.status = ret;
3551			break;
3552		}
3553		event.param.ud.qp_num = rep->qpn;
3554		event.param.ud.qkey = rep->qkey;
3555		event.event = RDMA_CM_EVENT_ESTABLISHED;
3556		event.status = 0;
3557		break;
3558	default:
3559		pr_err("RDMA CMA: unexpected IB CM event: %d\n",
3560		       ib_event->event);
3561		goto out;
3562	}
3563
3564	ret = id_priv->id.event_handler(&id_priv->id, &event);
3565	if (ret) {
3566		/* Destroy the CM ID by returning a non-zero value. */
3567		id_priv->cm_id.ib = NULL;
3568		cma_exch(id_priv, RDMA_CM_DESTROYING);
3569		mutex_unlock(&id_priv->handler_mutex);
3570		rdma_destroy_id(&id_priv->id);
3571		return ret;
3572	}
3573out:
3574	mutex_unlock(&id_priv->handler_mutex);
3575	return ret;
3576}
3577
3578static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
3579			      struct rdma_conn_param *conn_param)
3580{
3581	struct ib_cm_sidr_req_param req;
3582	struct ib_cm_id	*id;
3583	void *private_data;
3584	int offset, ret;
3585
3586	memset(&req, 0, sizeof req);
3587	offset = cma_user_data_offset(id_priv);
3588	req.private_data_len = offset + conn_param->private_data_len;
3589	if (req.private_data_len < conn_param->private_data_len)
3590		return -EINVAL;
3591
3592	if (req.private_data_len) {
3593		private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
3594		if (!private_data)
3595			return -ENOMEM;
3596	} else {
3597		private_data = NULL;
3598	}
3599
3600	if (conn_param->private_data && conn_param->private_data_len)
3601		memcpy((char *)private_data + offset, conn_param->private_data,
3602		       conn_param->private_data_len);
3603
3604	if (private_data) {
3605		ret = cma_format_hdr(private_data, id_priv);
3606		if (ret)
3607			goto out;
3608		req.private_data = private_data;
3609	}
3610
3611	id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
3612			     id_priv);
3613	if (IS_ERR(id)) {
3614		ret = PTR_ERR(id);
3615		goto out;
3616	}
3617	id_priv->cm_id.ib = id;
3618
3619	req.path = id_priv->id.route.path_rec;
3620	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
3621	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
3622	req.max_cm_retries = CMA_MAX_CM_RETRIES;
3623
3624	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
3625	if (ret) {
3626		ib_destroy_cm_id(id_priv->cm_id.ib);
3627		id_priv->cm_id.ib = NULL;
3628	}
3629out:
3630	kfree(private_data);
3631	return ret;
3632}
3633
3634static int cma_connect_ib(struct rdma_id_private *id_priv,
3635			  struct rdma_conn_param *conn_param)
3636{
3637	struct ib_cm_req_param req;
3638	struct rdma_route *route;
3639	void *private_data;
3640	struct ib_cm_id	*id;
3641	int offset, ret;
3642
3643	memset(&req, 0, sizeof req);
3644	offset = cma_user_data_offset(id_priv);
3645	req.private_data_len = offset + conn_param->private_data_len;
3646	if (req.private_data_len < conn_param->private_data_len)
3647		return -EINVAL;
3648
3649	if (req.private_data_len) {
3650		private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
3651		if (!private_data)
3652			return -ENOMEM;
3653	} else {
3654		private_data = NULL;
3655	}
3656
3657	if (conn_param->private_data && conn_param->private_data_len)
3658		memcpy((char *)private_data + offset, conn_param->private_data,
3659		       conn_param->private_data_len);
3660
3661	id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv);
3662	if (IS_ERR(id)) {
3663		ret = PTR_ERR(id);
3664		goto out;
3665	}
3666	id_priv->cm_id.ib = id;
3667
3668	route = &id_priv->id.route;
3669	if (private_data) {
3670		ret = cma_format_hdr(private_data, id_priv);
3671		if (ret)
3672			goto out;
3673		req.private_data = private_data;
3674	}
3675
3676	req.primary_path = &route->path_rec[0];
3677	if (route->num_paths == 2)
3678		req.alternate_path = &route->path_rec[1];
3679
3680	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
3681	req.qp_num = id_priv->qp_num;
3682	req.qp_type = id_priv->id.qp_type;
3683	req.starting_psn = id_priv->seq_num;
3684	req.responder_resources = conn_param->responder_resources;
3685	req.initiator_depth = conn_param->initiator_depth;
3686	req.flow_control = conn_param->flow_control;
3687	req.retry_count = min_t(u8, 7, conn_param->retry_count);
3688	req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
3689	req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
3690	req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
3691	req.max_cm_retries = CMA_MAX_CM_RETRIES;
3692	req.srq = id_priv->srq ? 1 : 0;
3693
3694	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
3695out:
3696	if (ret && !IS_ERR(id)) {
3697		ib_destroy_cm_id(id);
3698		id_priv->cm_id.ib = NULL;
3699	}
3700
3701	kfree(private_data);
3702	return ret;
3703}
3704
3705static int cma_connect_iw(struct rdma_id_private *id_priv,
3706			  struct rdma_conn_param *conn_param)
3707{
3708	struct iw_cm_id *cm_id;
3709	int ret;
3710	struct iw_cm_conn_param iw_param;
3711
3712	cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
3713	if (IS_ERR(cm_id))
3714		return PTR_ERR(cm_id);
3715
3716	cm_id->tos = id_priv->tos;
3717	id_priv->cm_id.iw = cm_id;
3718
3719	memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
3720	       rdma_addr_size(cma_src_addr(id_priv)));
3721	memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv),
3722	       rdma_addr_size(cma_dst_addr(id_priv)));
3723
3724	ret = cma_modify_qp_rtr(id_priv, conn_param);
3725	if (ret)
3726		goto out;
3727
3728	if (conn_param) {
3729		iw_param.ord = conn_param->initiator_depth;
3730		iw_param.ird = conn_param->responder_resources;
3731		iw_param.private_data = conn_param->private_data;
3732		iw_param.private_data_len = conn_param->private_data_len;
3733		iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
3734	} else {
3735		memset(&iw_param, 0, sizeof iw_param);
3736		iw_param.qpn = id_priv->qp_num;
3737	}
3738	ret = iw_cm_connect(cm_id, &iw_param);
3739out:
3740	if (ret) {
3741		iw_destroy_cm_id(cm_id);
3742		id_priv->cm_id.iw = NULL;
3743	}
3744	return ret;
3745}
3746
3747int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
3748{
3749	struct rdma_id_private *id_priv;
3750	int ret;
3751
3752	id_priv = container_of(id, struct rdma_id_private, id);
3753	if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
3754		return -EINVAL;
3755
3756	if (!id->qp) {
3757		id_priv->qp_num = conn_param->qp_num;
3758		id_priv->srq = conn_param->srq;
3759	}
3760
3761	if (rdma_cap_ib_cm(id->device, id->port_num)) {
3762		if (id->qp_type == IB_QPT_UD)
3763			ret = cma_resolve_ib_udp(id_priv, conn_param);
3764		else
3765			ret = cma_connect_ib(id_priv, conn_param);
3766	} else if (rdma_cap_iw_cm(id->device, id->port_num))
3767		ret = cma_connect_iw(id_priv, conn_param);
3768	else
3769		ret = -ENOSYS;
3770	if (ret)
3771		goto err;
3772
3773	return 0;
3774err:
3775	cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
3776	return ret;
3777}
3778EXPORT_SYMBOL(rdma_connect);
3779
3780static int cma_accept_ib(struct rdma_id_private *id_priv,
3781			 struct rdma_conn_param *conn_param)
3782{
3783	struct ib_cm_rep_param rep;
3784	int ret;
3785
3786	ret = cma_modify_qp_rtr(id_priv, conn_param);
3787	if (ret)
3788		goto out;
3789
3790	ret = cma_modify_qp_rts(id_priv, conn_param);
3791	if (ret)
3792		goto out;
3793
3794	memset(&rep, 0, sizeof rep);
3795	rep.qp_num = id_priv->qp_num;
3796	rep.starting_psn = id_priv->seq_num;
3797	rep.private_data = conn_param->private_data;
3798	rep.private_data_len = conn_param->private_data_len;
3799	rep.responder_resources = conn_param->responder_resources;
3800	rep.initiator_depth = conn_param->initiator_depth;
3801	rep.failover_accepted = 0;
3802	rep.flow_control = conn_param->flow_control;
3803	rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
3804	rep.srq = id_priv->srq ? 1 : 0;
3805
3806	ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
3807out:
3808	return ret;
3809}
3810
3811static int cma_accept_iw(struct rdma_id_private *id_priv,
3812		  struct rdma_conn_param *conn_param)
3813{
3814	struct iw_cm_conn_param iw_param;
3815	int ret;
3816
3817	ret = cma_modify_qp_rtr(id_priv, conn_param);
3818	if (ret)
3819		return ret;
3820
3821	iw_param.ord = conn_param->initiator_depth;
3822	iw_param.ird = conn_param->responder_resources;
3823	iw_param.private_data = conn_param->private_data;
3824	iw_param.private_data_len = conn_param->private_data_len;
3825	if (id_priv->id.qp) {
3826		iw_param.qpn = id_priv->qp_num;
3827	} else
3828		iw_param.qpn = conn_param->qp_num;
3829
3830	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
3831}
3832
3833static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
3834			     enum ib_cm_sidr_status status, u32 qkey,
3835			     const void *private_data, int private_data_len)
3836{
3837	struct ib_cm_sidr_rep_param rep;
3838	int ret;
3839
3840	memset(&rep, 0, sizeof rep);
3841	rep.status = status;
3842	if (status == IB_SIDR_SUCCESS) {
3843		ret = cma_set_qkey(id_priv, qkey);
3844		if (ret)
3845			return ret;
3846		rep.qp_num = id_priv->qp_num;
3847		rep.qkey = id_priv->qkey;
3848	}
3849	rep.private_data = private_data;
3850	rep.private_data_len = private_data_len;
3851
3852	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
3853}
3854
3855int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
3856{
3857	struct rdma_id_private *id_priv;
3858	int ret;
3859
3860	id_priv = container_of(id, struct rdma_id_private, id);
3861
3862	id_priv->owner = task_pid_nr(current);
3863
3864	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
3865		return -EINVAL;
3866
3867	if (!id->qp && conn_param) {
3868		id_priv->qp_num = conn_param->qp_num;
3869		id_priv->srq = conn_param->srq;
3870	}
3871
3872	if (rdma_cap_ib_cm(id->device, id->port_num)) {
3873		if (id->qp_type == IB_QPT_UD) {
3874			if (conn_param)
3875				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
3876							conn_param->qkey,
3877							conn_param->private_data,
3878							conn_param->private_data_len);
3879			else
3880				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
3881							0, NULL, 0);
3882		} else {
3883			if (conn_param)
3884				ret = cma_accept_ib(id_priv, conn_param);
3885			else
3886				ret = cma_rep_recv(id_priv);
3887		}
3888	} else if (rdma_cap_iw_cm(id->device, id->port_num))
3889		ret = cma_accept_iw(id_priv, conn_param);
3890	else
3891		ret = -ENOSYS;
3892
3893	if (ret)
3894		goto reject;
3895
3896	return 0;
3897reject:
3898	cma_modify_qp_err(id_priv);
3899	rdma_reject(id, NULL, 0);
3900	return ret;
3901}
3902EXPORT_SYMBOL(rdma_accept);
3903
3904int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
3905{
3906	struct rdma_id_private *id_priv;
3907	int ret;
3908
3909	id_priv = container_of(id, struct rdma_id_private, id);
3910	if (!id_priv->cm_id.ib)
3911		return -EINVAL;
3912
3913	switch (id->device->node_type) {
3914	case RDMA_NODE_IB_CA:
3915		ret = ib_cm_notify(id_priv->cm_id.ib, event);
3916		break;
3917	default:
3918		ret = 0;
3919		break;
3920	}
3921	return ret;
3922}
3923EXPORT_SYMBOL(rdma_notify);
3924
3925int rdma_reject(struct rdma_cm_id *id, const void *private_data,
3926		u8 private_data_len)
3927{
3928	struct rdma_id_private *id_priv;
3929	int ret;
3930
3931	id_priv = container_of(id, struct rdma_id_private, id);
3932	if (!id_priv->cm_id.ib)
3933		return -EINVAL;
3934
3935	if (rdma_cap_ib_cm(id->device, id->port_num)) {
3936		if (id->qp_type == IB_QPT_UD)
3937			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
3938						private_data, private_data_len);
3939		else
3940			ret = ib_send_cm_rej(id_priv->cm_id.ib,
3941					     IB_CM_REJ_CONSUMER_DEFINED, NULL,
3942					     0, private_data, private_data_len);
3943	} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
3944		ret = iw_cm_reject(id_priv->cm_id.iw,
3945				   private_data, private_data_len);
3946	} else
3947		ret = -ENOSYS;
3948
3949	return ret;
3950}
3951EXPORT_SYMBOL(rdma_reject);
3952
3953int rdma_disconnect(struct rdma_cm_id *id)
3954{
3955	struct rdma_id_private *id_priv;
3956	int ret;
3957
3958	id_priv = container_of(id, struct rdma_id_private, id);
3959	if (!id_priv->cm_id.ib)
3960		return -EINVAL;
3961
3962	if (rdma_cap_ib_cm(id->device, id->port_num)) {
3963		ret = cma_modify_qp_err(id_priv);
3964		if (ret)
3965			goto out;
3966		/* Initiate or respond to a disconnect. */
3967		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
3968			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
3969	} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
3970		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
3971	} else
3972		ret = -EINVAL;
3973
3974out:
3975	return ret;
3976}
3977EXPORT_SYMBOL(rdma_disconnect);
3978
3979static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
3980{
3981	struct rdma_id_private *id_priv;
3982	struct cma_multicast *mc = multicast->context;
3983	struct rdma_cm_event event;
3984	int ret = 0;
3985
3986	id_priv = mc->id_priv;
3987	mutex_lock(&id_priv->handler_mutex);
3988	if (id_priv->state != RDMA_CM_ADDR_BOUND &&
3989	    id_priv->state != RDMA_CM_ADDR_RESOLVED)
3990		goto out;
3991
3992	if (!status)
3993		status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
3994	mutex_lock(&id_priv->qp_mutex);
3995	if (!status && id_priv->id.qp)
3996		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
3997					 be16_to_cpu(multicast->rec.mlid));
3998	mutex_unlock(&id_priv->qp_mutex);
3999
4000	memset(&event, 0, sizeof event);
4001	event.status = status;
4002	event.param.ud.private_data = mc->context;
4003	if (!status) {
4004		struct rdma_dev_addr *dev_addr =
4005			&id_priv->id.route.addr.dev_addr;
4006		struct net_device *ndev =
4007			dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4008		enum ib_gid_type gid_type =
4009			id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
4010			rdma_start_port(id_priv->cma_dev->device)];
4011
4012		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
4013		ret = ib_init_ah_from_mcmember(id_priv->id.device,
4014					       id_priv->id.port_num,
4015					       &multicast->rec,
4016					       ndev, gid_type,
4017					       &event.param.ud.ah_attr);
4018		if (ret)
4019			event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
4020
4021		event.param.ud.qp_num = 0xFFFFFF;
4022		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
4023		if (ndev)
4024			dev_put(ndev);
4025	} else
4026		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
4027
4028	ret = id_priv->id.event_handler(&id_priv->id, &event);
4029	if (ret) {
4030		cma_exch(id_priv, RDMA_CM_DESTROYING);
4031		mutex_unlock(&id_priv->handler_mutex);
4032		rdma_destroy_id(&id_priv->id);
4033		return 0;
4034	}
4035
4036out:
4037	mutex_unlock(&id_priv->handler_mutex);
4038	return 0;
4039}
4040
4041static void cma_set_mgid(struct rdma_id_private *id_priv,
4042			 struct sockaddr *addr, union ib_gid *mgid)
4043{
4044	unsigned char mc_map[MAX_ADDR_LEN];
4045	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
4046	struct sockaddr_in *sin = (struct sockaddr_in *) addr;
4047	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
4048
4049	if (cma_any_addr(addr)) {
4050		memset(mgid, 0, sizeof *mgid);
4051	} else if ((addr->sa_family == AF_INET6) &&
4052		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
4053								 0xFF10A01B)) {
4054		/* IPv6 address is an SA assigned MGID. */
4055		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
4056	} else if (addr->sa_family == AF_IB) {
4057		memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
4058	} else if (addr->sa_family == AF_INET6) {
4059		ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
4060		if (id_priv->id.ps == RDMA_PS_UDP)
4061			mc_map[7] = 0x01;	/* Use RDMA CM signature */
4062		*mgid = *(union ib_gid *) (mc_map + 4);
4063	} else {
4064		ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
4065		if (id_priv->id.ps == RDMA_PS_UDP)
4066			mc_map[7] = 0x01;	/* Use RDMA CM signature */
4067		*mgid = *(union ib_gid *) (mc_map + 4);
4068	}
4069}
4070
4071static void cma_query_sa_classport_info_cb(int status,
4072					   struct ib_class_port_info *rec,
4073					   void *context)
4074{
4075	struct class_port_info_context *cb_ctx = context;
4076
4077	WARN_ON(!context);
4078
4079	if (status || !rec) {
4080		pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n",
4081			 cb_ctx->device->name, cb_ctx->port_num, status);
4082		goto out;
4083	}
4084
4085	memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info));
4086
4087out:
4088	complete(&cb_ctx->done);
4089}
4090
4091static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num,
4092				       struct ib_class_port_info *class_port_info)
4093{
4094	struct class_port_info_context *cb_ctx;
4095	int ret;
4096
4097	cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL);
4098	if (!cb_ctx)
4099		return -ENOMEM;
4100
4101	cb_ctx->device = device;
4102	cb_ctx->class_port_info = class_port_info;
4103	cb_ctx->port_num = port_num;
4104	init_completion(&cb_ctx->done);
4105
4106	ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num,
4107					     CMA_QUERY_CLASSPORT_INFO_TIMEOUT,
4108					     GFP_KERNEL, cma_query_sa_classport_info_cb,
4109					     cb_ctx, &cb_ctx->sa_query);
4110	if (ret < 0) {
4111		pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n",
4112		       device->name, port_num, ret);
4113		goto out;
4114	}
4115
4116	wait_for_completion(&cb_ctx->done);
4117
4118out:
4119	kfree(cb_ctx);
4120	return ret;
4121}
4122
4123static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
4124				 struct cma_multicast *mc)
4125{
4126	struct ib_sa_mcmember_rec rec;
4127	struct ib_class_port_info class_port_info;
4128	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
4129	ib_sa_comp_mask comp_mask;
4130	int ret;
4131
4132	ib_addr_get_mgid(dev_addr, &rec.mgid);
4133	ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
4134				     &rec.mgid, &rec);
4135	if (ret)
4136		return ret;
4137
4138	ret = cma_set_qkey(id_priv, 0);
4139	if (ret)
4140		return ret;
4141
4142	cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
4143	rec.qkey = cpu_to_be32(id_priv->qkey);
4144	rdma_addr_get_sgid(dev_addr, &rec.port_gid);
4145	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
4146	rec.join_state = mc->join_state;
4147
4148	if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) {
4149		ret = cma_query_sa_classport_info(id_priv->id.device,
4150						  id_priv->id.port_num,
4151						  &class_port_info);
4152
4153		if (ret)
4154			return ret;
4155
4156		if (!(ib_get_cpi_capmask2(&class_port_info) &
4157		      IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) {
4158			pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
4159				"RDMA CM: SM doesn't support Send Only Full Member option\n",
4160				id_priv->id.device->name, id_priv->id.port_num);
4161			return -EOPNOTSUPP;
4162		}
4163	}
4164
4165	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
4166		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
4167		    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
4168		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
4169		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
4170
4171	if (id_priv->id.ps == RDMA_PS_IPOIB)
4172		comp_mask |= IB_SA_MCMEMBER_REC_RATE |
4173			     IB_SA_MCMEMBER_REC_RATE_SELECTOR |
4174			     IB_SA_MCMEMBER_REC_MTU_SELECTOR |
4175			     IB_SA_MCMEMBER_REC_MTU |
4176			     IB_SA_MCMEMBER_REC_HOP_LIMIT;
4177
4178	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
4179						id_priv->id.port_num, &rec,
4180						comp_mask, GFP_KERNEL,
4181						cma_ib_mc_handler, mc);
4182	return PTR_ERR_OR_ZERO(mc->multicast.ib);
4183}
4184
4185static void iboe_mcast_work_handler(struct work_struct *work)
4186{
4187	struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
4188	struct cma_multicast *mc = mw->mc;
4189	struct ib_sa_multicast *m = mc->multicast.ib;
4190
4191	mc->multicast.ib->context = mc;
4192	cma_ib_mc_handler(0, m);
4193	kref_put(&mc->mcref, release_mc);
4194	kfree(mw);
4195}
4196
4197static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
4198			      enum ib_gid_type gid_type)
4199{
4200	struct sockaddr_in *sin = (struct sockaddr_in *)addr;
4201	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
4202
4203	if (cma_any_addr(addr)) {
4204		memset(mgid, 0, sizeof *mgid);
4205	} else if (addr->sa_family == AF_INET6) {
4206		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
4207	} else {
4208		mgid->raw[0] =
4209			(gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff;
4210		mgid->raw[1] =
4211			(gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e;
4212		mgid->raw[2] = 0;
4213		mgid->raw[3] = 0;
4214		mgid->raw[4] = 0;
4215		mgid->raw[5] = 0;
4216		mgid->raw[6] = 0;
4217		mgid->raw[7] = 0;
4218		mgid->raw[8] = 0;
4219		mgid->raw[9] = 0;
4220		mgid->raw[10] = 0xff;
4221		mgid->raw[11] = 0xff;
4222		*(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
4223	}
4224}
4225
4226static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
4227				   struct cma_multicast *mc)
4228{
4229	struct iboe_mcast_work *work;
4230	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
4231	int err = 0;
4232	struct sockaddr *addr = (struct sockaddr *)&mc->addr;
4233	struct net_device *ndev = NULL;
4234	enum ib_gid_type gid_type;
4235	bool send_only;
4236
4237	send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
4238
4239	if (cma_zero_addr((struct sockaddr *)&mc->addr))
4240		return -EINVAL;
4241
4242	work = kzalloc(sizeof *work, GFP_KERNEL);
4243	if (!work)
4244		return -ENOMEM;
4245
4246	mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
4247	if (!mc->multicast.ib) {
4248		err = -ENOMEM;
4249		goto out1;
4250	}
4251
4252	gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
4253		   rdma_start_port(id_priv->cma_dev->device)];
4254	cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
4255
4256	mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
4257	if (id_priv->id.ps == RDMA_PS_UDP)
4258		mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
4259
4260	if (dev_addr->bound_dev_if)
4261		ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4262	if (!ndev) {
4263		err = -ENODEV;
4264		goto out2;
4265	}
4266	mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
4267	mc->multicast.ib->rec.hop_limit = 1;
4268	mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu);
4269
4270	if (addr->sa_family == AF_INET || addr->sa_family == AF_INET6) {
4271		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
4272			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
4273			if (!send_only) {
4274				err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
4275						    true);
4276				if (!err)
4277					mc->igmp_joined = true;
4278			}
4279		}
4280	} else {
4281		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
4282			err = -ENOTSUPP;
4283	}
4284	dev_put(ndev);
4285	if (err || !mc->multicast.ib->rec.mtu) {
4286		if (!err)
4287			err = -EINVAL;
4288		goto out2;
4289	}
4290	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
4291		    &mc->multicast.ib->rec.port_gid);
4292	work->id = id_priv;
4293	work->mc = mc;
4294	INIT_WORK(&work->work, iboe_mcast_work_handler);
4295	kref_get(&mc->mcref);
4296	queue_work(cma_wq, &work->work);
4297
4298	return 0;
4299
4300out2:
4301	kfree(mc->multicast.ib);
4302out1:
4303	kfree(work);
4304	return err;
4305}
4306
4307int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
4308			u8 join_state, void *context)
4309{
4310	struct rdma_id_private *id_priv;
4311	struct cma_multicast *mc;
4312	int ret;
4313
4314	if (!id->device)
4315		return -EINVAL;
4316
4317	id_priv = container_of(id, struct rdma_id_private, id);
4318	if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
4319	    !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
4320		return -EINVAL;
4321
4322	mc = kmalloc(sizeof *mc, GFP_KERNEL);
4323	if (!mc)
4324		return -ENOMEM;
4325
4326	memcpy(&mc->addr, addr, rdma_addr_size(addr));
4327	mc->context = context;
4328	mc->id_priv = id_priv;
4329	mc->igmp_joined = false;
4330	mc->join_state = join_state;
4331	spin_lock(&id_priv->lock);
4332	list_add(&mc->list, &id_priv->mc_list);
4333	spin_unlock(&id_priv->lock);
4334
4335	if (rdma_protocol_roce(id->device, id->port_num)) {
4336		kref_init(&mc->mcref);
4337		ret = cma_iboe_join_multicast(id_priv, mc);
4338	} else if (rdma_cap_ib_mcast(id->device, id->port_num))
4339		ret = cma_join_ib_multicast(id_priv, mc);
4340	else
4341		ret = -ENOSYS;
4342
4343	if (ret) {
4344		spin_lock_irq(&id_priv->lock);
4345		list_del(&mc->list);
4346		spin_unlock_irq(&id_priv->lock);
4347		kfree(mc);
4348	}
4349	return ret;
4350}
4351EXPORT_SYMBOL(rdma_join_multicast);
4352
4353void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
4354{
4355	struct rdma_id_private *id_priv;
4356	struct cma_multicast *mc;
4357
4358	id_priv = container_of(id, struct rdma_id_private, id);
4359	spin_lock_irq(&id_priv->lock);
4360	list_for_each_entry(mc, &id_priv->mc_list, list) {
4361		if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
4362			list_del(&mc->list);
4363			spin_unlock_irq(&id_priv->lock);
4364
4365			if (id->qp)
4366				ib_detach_mcast(id->qp,
4367						&mc->multicast.ib->rec.mgid,
4368						be16_to_cpu(mc->multicast.ib->rec.mlid));
4369
4370			BUG_ON(id_priv->cma_dev->device != id->device);
4371
4372			if (rdma_cap_ib_mcast(id->device, id->port_num)) {
4373				ib_sa_free_multicast(mc->multicast.ib);
4374				kfree(mc);
4375			} else if (rdma_protocol_roce(id->device, id->port_num)) {
4376				if (mc->igmp_joined) {
4377					struct rdma_dev_addr *dev_addr =
4378						&id->route.addr.dev_addr;
4379					struct net_device *ndev = NULL;
4380
4381					if (dev_addr->bound_dev_if)
4382						ndev = dev_get_by_index(dev_addr->net,
4383									dev_addr->bound_dev_if);
4384					if (ndev) {
4385						cma_igmp_send(ndev,
4386							      &mc->multicast.ib->rec.mgid,
4387							      false);
4388						dev_put(ndev);
4389					}
4390					mc->igmp_joined = false;
4391				}
4392				kref_put(&mc->mcref, release_mc);
4393			}
4394			return;
4395		}
4396	}
4397	spin_unlock_irq(&id_priv->lock);
4398}
4399EXPORT_SYMBOL(rdma_leave_multicast);
4400
4401static int
4402sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS)
4403{
4404	struct cma_device *cma_dev = arg1;
4405	const int port = arg2;
4406	char buf[64];
4407	int error;
4408
4409	strlcpy(buf, ib_cache_gid_type_str(
4410	    cma_get_default_gid_type(cma_dev, port)), sizeof(buf));
4411
4412	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
4413	if (error != 0 || req->newptr == NULL)
4414		goto done;
4415
4416	error = ib_cache_gid_parse_type_str(buf);
4417	if (error < 0) {
4418		error = EINVAL;
4419		goto done;
4420	}
4421
4422	cma_set_default_gid_type(cma_dev, port, error);
4423	error = 0;
4424done:
4425	return (error);
4426}
4427
4428static void cma_add_one(struct ib_device *device)
4429{
4430	struct cma_device *cma_dev;
4431	struct rdma_id_private *id_priv;
4432	unsigned int i;
4433
4434	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
4435	if (!cma_dev)
4436		return;
4437
4438	sysctl_ctx_init(&cma_dev->sysctl_ctx);
4439
4440	cma_dev->device = device;
4441	cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
4442					    sizeof(*cma_dev->default_gid_type),
4443					    GFP_KERNEL);
4444	if (!cma_dev->default_gid_type) {
4445		kfree(cma_dev);
4446		return;
4447	}
4448	for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
4449		unsigned long supported_gids;
4450		unsigned int default_gid_type;
4451
4452		supported_gids = roce_gid_type_mask_support(device, i);
4453
4454		if (WARN_ON(!supported_gids)) {
4455			/* set something valid */
4456			default_gid_type = 0;
4457		} else if (test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) {
4458			/* prefer RoCEv2, if supported */
4459			default_gid_type = IB_GID_TYPE_ROCE_UDP_ENCAP;
4460		} else {
4461			default_gid_type = find_first_bit(&supported_gids,
4462			    BITS_PER_LONG);
4463		}
4464		cma_dev->default_gid_type[i - rdma_start_port(device)] =
4465		    default_gid_type;
4466	}
4467
4468	init_completion(&cma_dev->comp);
4469	atomic_set(&cma_dev->refcount, 1);
4470	INIT_LIST_HEAD(&cma_dev->id_list);
4471	ib_set_client_data(device, &cma_client, cma_dev);
4472
4473	mutex_lock(&lock);
4474	list_add_tail(&cma_dev->list, &dev_list);
4475	list_for_each_entry(id_priv, &listen_any_list, list)
4476		cma_listen_on_dev(id_priv, cma_dev);
4477	mutex_unlock(&lock);
4478
4479	for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
4480		char buf[64];
4481
4482		snprintf(buf, sizeof(buf), "default_roce_mode_port%d", i);
4483
4484		(void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx,
4485		    SYSCTL_CHILDREN(device->ports_parent->parent->oidp),
4486		    OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
4487		    cma_dev, i, &sysctl_cma_default_roce_mode, "A",
4488		    "Default RoCE mode. Valid values: IB/RoCE v1 and RoCE v2");
4489	}
4490}
4491
4492static int cma_remove_id_dev(struct rdma_id_private *id_priv)
4493{
4494	struct rdma_cm_event event;
4495	enum rdma_cm_state state;
4496	int ret = 0;
4497
4498	/* Record that we want to remove the device */
4499	state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
4500	if (state == RDMA_CM_DESTROYING)
4501		return 0;
4502
4503	cma_cancel_operation(id_priv, state);
4504	mutex_lock(&id_priv->handler_mutex);
4505
4506	/* Check for destruction from another callback. */
4507	if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
4508		goto out;
4509
4510	memset(&event, 0, sizeof event);
4511	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
4512	ret = id_priv->id.event_handler(&id_priv->id, &event);
4513out:
4514	mutex_unlock(&id_priv->handler_mutex);
4515	return ret;
4516}
4517
4518static void cma_process_remove(struct cma_device *cma_dev)
4519{
4520	struct rdma_id_private *id_priv;
4521	int ret;
4522
4523	mutex_lock(&lock);
4524	while (!list_empty(&cma_dev->id_list)) {
4525		id_priv = list_entry(cma_dev->id_list.next,
4526				     struct rdma_id_private, list);
4527
4528		list_del(&id_priv->listen_list);
4529		list_del_init(&id_priv->list);
4530		atomic_inc(&id_priv->refcount);
4531		mutex_unlock(&lock);
4532
4533		ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
4534		cma_deref_id(id_priv);
4535		if (ret)
4536			rdma_destroy_id(&id_priv->id);
4537
4538		mutex_lock(&lock);
4539	}
4540	mutex_unlock(&lock);
4541
4542	cma_deref_dev(cma_dev);
4543	wait_for_completion(&cma_dev->comp);
4544}
4545
4546static void cma_remove_one(struct ib_device *device, void *client_data)
4547{
4548	struct cma_device *cma_dev = client_data;
4549
4550	if (!cma_dev)
4551		return;
4552
4553	mutex_lock(&lock);
4554	list_del(&cma_dev->list);
4555	mutex_unlock(&lock);
4556
4557	cma_process_remove(cma_dev);
4558	sysctl_ctx_free(&cma_dev->sysctl_ctx);
4559	kfree(cma_dev->default_gid_type);
4560	kfree(cma_dev);
4561}
4562
4563static void cma_init_vnet(void *arg)
4564{
4565	struct cma_pernet *pernet = &VNET(cma_pernet);
4566
4567	idr_init(&pernet->tcp_ps);
4568	idr_init(&pernet->udp_ps);
4569	idr_init(&pernet->ipoib_ps);
4570	idr_init(&pernet->ib_ps);
4571	idr_init(&pernet->sdp_ps);
4572}
4573VNET_SYSINIT(cma_init_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_FIRST, cma_init_vnet, NULL);
4574
4575static void cma_destroy_vnet(void *arg)
4576{
4577	struct cma_pernet *pernet = &VNET(cma_pernet);
4578
4579	idr_destroy(&pernet->tcp_ps);
4580	idr_destroy(&pernet->udp_ps);
4581	idr_destroy(&pernet->ipoib_ps);
4582	idr_destroy(&pernet->ib_ps);
4583	idr_destroy(&pernet->sdp_ps);
4584}
4585VNET_SYSUNINIT(cma_destroy_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_SECOND, cma_destroy_vnet, NULL);
4586
4587static int __init cma_init(void)
4588{
4589	int ret;
4590
4591	cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
4592	if (!cma_wq)
4593		return -ENOMEM;
4594
4595	ib_sa_register_client(&sa_client);
4596	rdma_addr_register_client(&addr_client);
4597
4598	ret = ib_register_client(&cma_client);
4599	if (ret)
4600		goto err;
4601
4602	cma_configfs_init();
4603
4604	return 0;
4605
4606err:
4607	rdma_addr_unregister_client(&addr_client);
4608	ib_sa_unregister_client(&sa_client);
4609	destroy_workqueue(cma_wq);
4610	return ret;
4611}
4612
4613static void __exit cma_cleanup(void)
4614{
4615	cma_configfs_exit();
4616	ib_unregister_client(&cma_client);
4617	rdma_addr_unregister_client(&addr_client);
4618	ib_sa_unregister_client(&sa_client);
4619	destroy_workqueue(cma_wq);
4620}
4621
4622module_init(cma_init);
4623module_exit(cma_cleanup);
4624