xref: /illumos-gate/usr/src/uts/common/sys/idm/idm_impl.h (revision a98e9e2e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 /*
25  * Copyright 2014-2015 Nexenta Systems, Inc.  All rights reserved.
26  */
27 
28 #ifndef	_IDM_IMPL_H_
29 #define	_IDM_IMPL_H_
30 
31 #ifdef	__cplusplus
32 extern "C" {
33 #endif
34 
35 #include <sys/avl.h>
36 #include <sys/socket_impl.h>
37 #include <sys/taskq_impl.h>
38 
39 /*
40  * IDM lock order:
41  *
42  * idm_taskid_table_lock, idm_task_t.idt_mutex
43  */
44 
45 #define	CF_LOGIN_READY		0x00000001
46 #define	CF_INITIAL_LOGIN	0x00000002
47 #define	CF_ERROR		0x80000000
48 
49 typedef enum {
50 	CONN_TYPE_INI = 1,
51 	CONN_TYPE_TGT
52 } idm_conn_type_t;
53 
54 /*
55  * Watchdog interval in seconds
56  */
57 #define	IDM_WD_INTERVAL			5
58 
59 /*
60  * Timeout period before the client "keepalive" callback is invoked in
61  * seconds if the connection is idle.
62  */
63 #define	IDM_TRANSPORT_KEEPALIVE_IDLE_TIMEOUT	20
64 
65 /*
66  * Timeout period before a TRANSPORT_FAIL event is generated in seconds
67  * if the connection is idle.
68  */
69 #define	IDM_TRANSPORT_FAIL_IDLE_TIMEOUT	30
70 
71 /*
72  * IDM reference count structure.  Audit code is shamelessly adapted
73  * from CIFS server.
74  */
75 
76 #define	REFCNT_AUDIT_STACK_DEPTH	16
77 #define	REFCNT_AUDIT_BUF_MAX_REC	16
78 
79 typedef struct {
80 	uint32_t		anr_refcnt;
81 	int			anr_depth;
82 	pc_t			anr_stack[REFCNT_AUDIT_STACK_DEPTH];
83 } refcnt_audit_record_t;
84 
85 typedef struct {
86 	int			anb_index;
87 	int			anb_max_index;
88 	refcnt_audit_record_t	anb_records[REFCNT_AUDIT_BUF_MAX_REC];
89 } refcnt_audit_buf_t;
90 
91 #define	REFCNT_AUDIT(_rf_) {				\
92 	refcnt_audit_record_t	*anr;			\
93 							\
94 	anr = (_rf_)->ir_audit_buf.anb_records;		\
95 	anr += (_rf_)->ir_audit_buf.anb_index;		\
96 	(_rf_)->ir_audit_buf.anb_index++;		\
97 	(_rf_)->ir_audit_buf.anb_index &=		\
98 	    (_rf_)->ir_audit_buf.anb_max_index;		\
99 	anr->anr_refcnt = (_rf_)->ir_refcnt;		\
100 	anr->anr_depth = getpcstack(anr->anr_stack,	\
101 	    REFCNT_AUDIT_STACK_DEPTH);			\
102 }
103 
104 struct idm_refcnt_s;
105 
106 typedef void (idm_refcnt_cb_t)(void *ref_obj);
107 
108 typedef enum {
109 	REF_NOWAIT,
110 	REF_WAIT_SYNC,
111 	REF_WAIT_ASYNC
112 } idm_refcnt_wait_t;
113 
114 typedef struct idm_refcnt_s {
115 	int			ir_refcnt;
116 	void			*ir_referenced_obj;
117 	idm_refcnt_wait_t	ir_waiting;
118 	kmutex_t		ir_mutex;
119 	kcondvar_t		ir_cv;
120 	idm_refcnt_cb_t		*ir_cb;
121 	refcnt_audit_buf_t	ir_audit_buf;
122 } idm_refcnt_t;
123 
124 /*
125  * connection parameters - These parameters would be populated at
126  * connection create, or during key-value negotiation at login
127  */
128 typedef struct idm_conn_params_s {
129 	uint32_t		max_recv_dataseglen;
130 	uint32_t		max_xmit_dataseglen;
131 	uint32_t		conn_login_max;
132 	uint32_t		conn_login_interval;
133 	boolean_t		nonblock_socket;
134 } idm_conn_param_t;
135 
136 typedef struct idm_svc_s {
137 	list_node_t		is_list_node;
138 	kmutex_t		is_mutex;
139 	kcondvar_t		is_cv;
140 	kmutex_t		is_count_mutex;
141 	kcondvar_t		is_count_cv;
142 	idm_refcnt_t		is_refcnt;
143 	int			is_online;
144 	/* transport-specific service components */
145 	void			*is_so_svc;
146 	void			*is_iser_svc;
147 	idm_svc_req_t		is_svc_req;
148 } idm_svc_t;
149 
150 #define	ISCSI_MAX_TSIH_LEN	6	/* 0x%04x */
151 #define	ISCSI_MAX_ISID_LEN	ISCSI_ISID_LEN * 2
152 
153 typedef struct idm_conn_s {
154 	list_node_t		ic_list_node;
155 	void			*ic_handle;
156 	idm_refcnt_t		ic_refcnt;
157 	idm_svc_t		*ic_svc_binding; /* Target conn. only */
158 	idm_sockaddr_t		ic_ini_dst_addr;
159 	struct sockaddr_storage	ic_laddr;	/* conn local address */
160 	struct sockaddr_storage	ic_raddr;	/* conn remote address */
161 
162 	/*
163 	 * the target_name, initiator_name, initiator session
164 	 * identifier and target session identifying handle
165 	 * are only used for target connections.
166 	 */
167 	char			ic_target_name[ISCSI_MAX_NAME_LEN + 1];
168 	char			ic_initiator_name[ISCSI_MAX_NAME_LEN + 1];
169 	char			ic_tsih[ISCSI_MAX_TSIH_LEN + 1];
170 	char			ic_isid[ISCSI_MAX_ISID_LEN + 1];
171 	idm_conn_state_t	ic_state;
172 	idm_conn_state_t	ic_last_state;
173 	sm_audit_buf_t		ic_state_audit;
174 	kmutex_t		ic_state_mutex;
175 	kcondvar_t		ic_state_cv;
176 	uint32_t		ic_state_flags;
177 	timeout_id_t		ic_state_timeout;
178 	struct idm_conn_s	*ic_reinstate_conn; /* For conn reinst. */
179 	struct idm_conn_s	*ic_logout_conn; /* For other conn logout */
180 	taskq_t			*ic_state_taskq;
181 	int			ic_pdu_events;
182 	boolean_t		ic_login_info_valid;
183 	boolean_t		ic_rdma_extensions;
184 	uint16_t		ic_login_cid;
185 
186 	kmutex_t		ic_mutex;
187 	kcondvar_t		ic_cv;
188 	idm_status_t		ic_conn_sm_status;
189 
190 	boolean_t		ic_ffp;
191 	boolean_t		ic_keepalive;
192 	uint32_t		ic_internal_cid;
193 
194 	uint32_t		ic_conn_flags;
195 	idm_conn_type_t		ic_conn_type;
196 	idm_conn_ops_t		ic_conn_ops;
197 	idm_transport_ops_t	*ic_transport_ops;
198 	idm_transport_type_t	ic_transport_type;
199 	int			ic_transport_hdrlen;
200 	void			*ic_transport_private;
201 	idm_conn_param_t	ic_conn_params;
202 	/*
203 	 * Save client callback to interpose idm callback
204 	 */
205 	idm_pdu_cb_t		*ic_client_callback;
206 	clock_t			ic_timestamp;
207 } idm_conn_t;
208 
209 #define	IDM_CONN_HEADER_DIGEST	0x00000001
210 #define	IDM_CONN_DATA_DIGEST	0x00000002
211 #define	IDM_CONN_USE_SCOREBOARD	0x00000004
212 
213 #define	IDM_CONN_ISINI(ICI_IC)	((ICI_IC)->ic_conn_type == CONN_TYPE_INI)
214 #define	IDM_CONN_ISTGT(ICI_IC)	((ICI_IC)->ic_conn_type == CONN_TYPE_TGT)
215 
216 /*
217  * An IDM target task can transfer data using multiple buffers. The task
218  * will maintain a list of buffers, and each buffer will contain the relative
219  * offset of the transfer and a pointer to the next buffer in the list.
220  *
221  * Note on client private data:
222  * idt_private is intended to be a pointer to some sort of client-
223  * specific state.
224  *
225  * idt_client_handle is a more generic client-private piece of data that can
226  * be used by the client for the express purpose of task lookup.  The driving
227  * use case for this is for the client to store the initiator task tag for
228  * a given task so that it may be more easily retrieved for task management.
229  *
230  * The key take away here is that clients should never call
231  * idm_task_find_by_handle in the performance path.
232  *
233  * An initiator will require only one buffer per task, the offset will be 0.
234  */
235 
236 typedef struct idm_task_s {
237 	idm_conn_t		*idt_ic;	/* Associated connection */
238 	/* connection type is in idt_ic->ic_conn_type */
239 	kmutex_t		idt_mutex;
240 	void			*idt_private;	/* Client private data */
241 	uintptr_t		idt_client_handle;	/* Client private */
242 	uint32_t		idt_tt;		/* Task tag */
243 	uint32_t		idt_r2t_ttt;	/* R2T Target Task tag */
244 	idm_task_state_t	idt_state;
245 	idm_refcnt_t		idt_refcnt;
246 
247 	/*
248 	 * Statistics
249 	 */
250 	int			idt_tx_to_ini_start;
251 	int			idt_tx_to_ini_done;
252 	int			idt_rx_from_ini_start;
253 	int			idt_rx_from_ini_done;
254 	int			idt_tx_bytes;	/* IDM_CONN_USE_SCOREBOARD */
255 	int			idt_rx_bytes;	/* IDM_CONN_USE_SCOREBOARD */
256 
257 	uint32_t		idt_exp_datasn;	/* expected datasn */
258 	uint32_t		idt_exp_rttsn;	/* expected rttsn */
259 	list_t			idt_inbufv;	/* chunks of IN buffers */
260 	list_t			idt_outbufv;	/* chunks of OUT buffers */
261 
262 	/*
263 	 * Transport header, which describes this tasks remote tagged buffer
264 	 */
265 	int			idt_transport_hdrlen;
266 	void			*idt_transport_hdr;
267 	uint32_t		idt_flags;	/* phase collapse */
268 } idm_task_t;
269 
270 int idm_task_constructor(void *task_void, void *arg, int flags);
271 void idm_task_destructor(void *task_void, void *arg);
272 
273 #define	IDM_TASKIDS_MAX		16384
274 #define	IDM_BUF_MAGIC		0x49425546	/* "IBUF" */
275 
276 #define	IDM_TASK_PHASECOLLAPSE_REQ	0x00000001 /* request phase collapse */
277 #define	IDM_TASK_PHASECOLLAPSE_SUCCESS	0x00000002 /* phase collapse success */
278 
279 /* Protect with task mutex */
280 typedef struct idm_buf_s {
281 	uint32_t	idb_magic;	/* "IBUF" */
282 
283 	/*
284 	 * Note: idm_tx_link *must* be the second element in the list for
285 	 * proper TX PDU ordering.
286 	 */
287 	list_node_t	idm_tx_link;	/* link in a list of TX objects */
288 
289 	list_node_t	idb_buflink;	/* link in a multi-buffer data xfer */
290 	idm_conn_t	*idb_ic;	/* Associated connection */
291 	void		*idb_buf;	/* data */
292 	uint64_t	idb_buflen;	/* length of buffer */
293 	size_t		idb_bufoffset;	/* offset in a multi-buffer xfer */
294 	boolean_t	idb_bufalloc;  /* true if alloc'd in idm_buf_alloc */
295 	/*
296 	 * DataPDUInOrder=Yes, so to track that the PDUs in a sequence are sent
297 	 * in continuously increasing address order, check that offsets for a
298 	 * single buffer xfer are in order.
299 	 */
300 	uint32_t	idb_exp_offset;
301 	size_t		idb_xfer_len;	/* Current requested xfer len */
302 	void		*idb_buf_private; /* transport-specific buf handle */
303 	void		*idb_reg_private; /* transport-specific reg handle */
304 	void		*idb_bufptr; /* transport-specific bcopy pointer */
305 	boolean_t	idb_bufbcopy;	/* true if bcopy required */
306 
307 	idm_buf_cb_t	*idb_buf_cb;	/* Data Completion Notify, tgt only */
308 	void		*idb_cb_arg;	/* Client private data */
309 	idm_task_t	*idb_task_binding;
310 	timespec_t	idb_xfer_start;
311 	timespec_t	idb_xfer_done;
312 	boolean_t	idb_in_transport;
313 	boolean_t	idb_tx_thread;		/* Sockets only */
314 	iscsi_hdr_t	idb_data_hdr_tmpl;	/* Sockets only */
315 	idm_status_t	idb_status;
316 } idm_buf_t;
317 
318 typedef enum {
319 	BP_CHECK_QUICK,
320 	BP_CHECK_THOROUGH,
321 	BP_CHECK_ASSERT
322 } idm_bufpat_check_type_t;
323 
324 #define	BUFPAT_MATCH(bc_bufpat, bc_idb)			\
325 	((bufpat->bufpat_idb == bc_idb) &&		\
326 	    (bufpat->bufpat_bufmagic == IDM_BUF_MAGIC))
327 
328 typedef struct idm_bufpat_s {
329 	void		*bufpat_idb;
330 	uint32_t	bufpat_bufmagic;
331 	uint32_t	bufpat_offset;
332 } idm_bufpat_t;
333 
334 #define	PDU_MAX_IOVLEN	12
335 #define	IDM_PDU_MAGIC	0x49504455	/* "IPDU" */
336 
337 typedef struct idm_pdu_s {
338 	uint32_t	isp_magic;	/* "IPDU" */
339 
340 	/*
341 	 * Internal - Order is vital.  idm_tx_link *must* be the second
342 	 * element in this structure for proper TX PDU ordering.
343 	 */
344 	list_node_t	idm_tx_link;
345 
346 	list_node_t	isp_client_lnd;
347 
348 	idm_conn_t	*isp_ic;	/* Must be set */
349 	iscsi_hdr_t	*isp_hdr;
350 	uint_t		isp_hdrlen;
351 	uint8_t		*isp_data;
352 	uint_t		isp_datalen;
353 
354 	/* Transport header */
355 	void		*isp_transport_hdr;
356 	uint32_t	isp_transport_hdrlen;
357 	void		*isp_transport_private;
358 
359 	/*
360 	 * isp_data is used for sending SCSI status, NOP, text, scsi and
361 	 * non-scsi data. Data is received using isp_iov and isp_iovlen
362 	 * to support data over multiple buffers.
363 	 */
364 	void		*isp_private;
365 	idm_pdu_cb_t	*isp_callback;
366 	idm_status_t	isp_status;
367 
368 	/*
369 	 * The following four elements are only used in
370 	 * idm_sorecv_scsidata() currently.
371 	 */
372 	struct iovec	isp_iov[PDU_MAX_IOVLEN];
373 	int		isp_iovlen;
374 	idm_buf_t	*isp_sorx_buf;
375 
376 	/* Implementation data for idm_pdu_alloc and sorx PDU cache */
377 	uint32_t	isp_flags;
378 	uint_t		isp_hdrbuflen;
379 	uint_t		isp_databuflen;
380 	hrtime_t	isp_queue_time;
381 
382 	/* Taskq dispatching state for deferred PDU */
383 	taskq_ent_t	isp_tqent;
384 } idm_pdu_t;
385 
386 /*
387  * This "generic" object is used when removing an item from the ic_tx_list
388  * in order to determine whether it's an idm_pdu_t or an idm_buf_t
389  */
390 
391 typedef struct {
392 	uint32_t	idm_tx_obj_magic;
393 	/*
394 	 * idm_tx_link *must* be the second element in this structure.
395 	 */
396 	list_node_t	idm_tx_link;
397 } idm_tx_obj_t;
398 
399 
400 #define	IDM_PDU_OPCODE(PDU) \
401 	((PDU)->isp_hdr->opcode & ISCSI_OPCODE_MASK)
402 
403 #define	IDM_PDU_ALLOC		0x00000001
404 #define	IDM_PDU_ADDL_HDR	0x00000002
405 #define	IDM_PDU_ADDL_DATA	0x00000004
406 #define	IDM_PDU_LOGIN_TX	0x00000008
407 #define	IDM_PDU_SET_STATSN	0x00000010
408 #define	IDM_PDU_ADVANCE_STATSN	0x00000020
409 
410 #define	OSD_EXT_CDB_AHSLEN	(200 - 15)
411 #define	BIDI_AHS_LENGTH		5
412 /*
413  * Additional Header Segment (AHS)
414  * AHS is only valid for SCSI Requests and contains SCSI CDB information
415  * which doesn't fit in the standard 16 byte area of the PDU. Commonly
416  * this only holds true for OSD device commands.
417  *
418  * IDM_SORX_CACHE_ASHLEN is the amount of memory which is preallocated in bytes.
419  * When used in the header the AHS length is stored as the number of 4-byte
420  * words; so IDM_SORX_WIRE_ASHLEN is IDM_SORX_CACHE_ASHLEN in words.
421  */
422 #define	IDM_SORX_CACHE_AHSLEN \
423 	((OSD_EXT_CDB_AHSLEN + 3) + (BIDI_AHS_LENGTH + 3))
424 #define	IDM_SORX_WIRE_AHSLEN (IDM_SORX_CACHE_AHSLEN / sizeof (uint32_t))
425 #define	IDM_SORX_CACHE_HDRLEN	(sizeof (iscsi_hdr_t) + IDM_SORX_CACHE_AHSLEN)
426 
427 /*
428  * ID pool
429  */
430 
431 #define	IDM_IDPOOL_MAGIC	0x4944504C	/* IDPL */
432 #define	IDM_IDPOOL_MIN_SIZE	64	/* Number of IDs to begin with */
433 #define	IDM_IDPOOL_MAX_SIZE	64 * 1024
434 
435 typedef struct idm_idpool {
436 	uint32_t	id_magic;
437 	kmutex_t	id_mutex;
438 	uint8_t		*id_pool;
439 	uint32_t	id_size;
440 	uint8_t		id_bit;
441 	uint8_t		id_bit_idx;
442 	uint32_t	id_idx;
443 	uint32_t	id_idx_msk;
444 	uint32_t	id_free_counter;
445 	uint32_t	id_max_free_counter;
446 } idm_idpool_t;
447 
448 /*
449  * Global IDM state structure
450  */
451 typedef struct {
452 	kmutex_t	idm_global_mutex;
453 	taskq_t		*idm_global_taskq;
454 	kthread_t	*idm_wd_thread;
455 	kt_did_t	idm_wd_thread_did;
456 	boolean_t	idm_wd_thread_running;
457 	kcondvar_t	idm_wd_cv;
458 	list_t		idm_tgt_svc_list;
459 	kcondvar_t	idm_tgt_svc_cv;
460 	list_t		idm_tgt_conn_list;
461 	int		idm_tgt_conn_count;
462 	list_t		idm_ini_conn_list;
463 	kmem_cache_t	*idm_buf_cache;
464 	kmem_cache_t	*idm_task_cache;
465 	krwlock_t	idm_taskid_table_lock;
466 	idm_task_t	**idm_taskid_table;
467 	uint32_t	idm_taskid_next;
468 	uint32_t	idm_taskid_max;
469 	idm_idpool_t	idm_conn_id_pool;
470 	kmem_cache_t	*idm_sotx_pdu_cache;
471 	kmem_cache_t	*idm_sorx_pdu_cache;
472 	kmem_cache_t	*idm_so_128k_buf_cache;
473 } idm_global_t;
474 
475 extern idm_global_t	idm; /* Global state */
476 
477 int
478 idm_idpool_create(idm_idpool_t	*pool);
479 
480 void
481 idm_idpool_destroy(idm_idpool_t *pool);
482 
483 int
484 idm_idpool_alloc(idm_idpool_t *pool, uint16_t *id);
485 
486 void
487 idm_idpool_free(idm_idpool_t *pool, uint16_t id);
488 
489 void
490 idm_pdu_rx(idm_conn_t *ic, idm_pdu_t *pdu);
491 
492 void
493 idm_pdu_tx_forward(idm_conn_t *ic, idm_pdu_t *pdu);
494 
495 boolean_t
496 idm_pdu_rx_forward_ffp(idm_conn_t *ic, idm_pdu_t *pdu);
497 
498 void
499 idm_pdu_rx_forward(idm_conn_t *ic, idm_pdu_t *pdu);
500 
501 void
502 idm_pdu_tx_protocol_error(idm_conn_t *ic, idm_pdu_t *pdu);
503 
504 void
505 idm_pdu_rx_protocol_error(idm_conn_t *ic, idm_pdu_t *pdu);
506 
507 void idm_parse_login_rsp(idm_conn_t *ic, idm_pdu_t *logout_req_pdu,
508     boolean_t rx);
509 
510 void idm_parse_logout_req(idm_conn_t *ic, idm_pdu_t *logout_req_pdu,
511     boolean_t rx);
512 
513 void idm_parse_logout_rsp(idm_conn_t *ic, idm_pdu_t *login_rsp_pdu,
514     boolean_t rx);
515 
516 idm_status_t idm_svc_conn_create(idm_svc_t *is, idm_transport_type_t type,
517     idm_conn_t **ic_result);
518 
519 void idm_svc_conn_destroy(idm_conn_t *ic);
520 
521 idm_status_t idm_ini_conn_finish(idm_conn_t *ic);
522 
523 idm_status_t idm_tgt_conn_finish(idm_conn_t *ic);
524 
525 idm_conn_t *idm_conn_create_common(idm_conn_type_t conn_type,
526     idm_transport_type_t tt, idm_conn_ops_t *conn_ops);
527 
528 void idm_conn_destroy_common(idm_conn_t *ic);
529 
530 void idm_conn_close(idm_conn_t *ic);
531 
532 uint32_t idm_cid_alloc(void);
533 
534 void idm_cid_free(uint32_t cid);
535 
536 uint32_t idm_crc32c(void *address, unsigned long length);
537 
538 uint32_t idm_crc32c_continued(void *address, unsigned long length,
539     uint32_t crc);
540 
541 void idm_listbuf_insert(list_t *lst, idm_buf_t *buf);
542 
543 idm_conn_t *idm_lookup_conn(uint8_t *isid, uint16_t tsih, uint16_t cid);
544 
545 #ifdef	__cplusplus
546 }
547 #endif
548 
549 #endif /* _IDM_IMPL_H_ */
550