1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * etm.c	FMA Event Transport Module implementation, a plugin of FMD
29 *		for sun4v/Ontario
30 *
31 * plugin for sending/receiving FMA events to/from service processor
32 */
33
34/*
35 * --------------------------------- includes --------------------------------
36 */
37
38#include <sys/fm/protocol.h>
39#include <sys/fm/util.h>
40#include <sys/fm/ldom.h>
41#include <sys/strlog.h>
42#include <sys/syslog.h>
43#include <sys/libds.h>
44#include <netinet/in.h>
45#include <fm/fmd_api.h>
46
47#include "etm_xport_api.h"
48#include "etm_etm_proto.h"
49#include "etm_impl.h"
50#include "etm_iosvc.h"
51#include "etm_filter.h"
52#include "etm_ckpt.h"
53
54#include <pthread.h>
55#include <signal.h>
56#include <stropts.h>
57#include <locale.h>
58#include <strings.h>
59#include <stdlib.h>
60#include <unistd.h>
61#include <limits.h>
62#include <values.h>
63#include <alloca.h>
64#include <errno.h>
65#include <dlfcn.h>
66#include <link.h>
67#include <fcntl.h>
68#include <time.h>
69
70/*
71 * ----------------------------- forward decls -------------------------------
72 */
73
74static void
75etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class);
76
77static int
78etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xp, fmd_event_t *event, nvlist_t *nvl);
79
80static void
81etm_send_to_remote_root(void *arg);
82
83static void
84etm_recv_from_remote_root(void *arg);
85
86static void
87etm_ckpt_remove(fmd_hdl_t *hdl, etm_iosvc_q_ele_t *ele);
88
89/*
90 * ------------------------- data structs for FMD ----------------------------
91 */
92
93static const fmd_hdl_ops_t fmd_ops = {
94	etm_recv,	/* fmdo_recv */
95	NULL,		/* fmdo_timeout */
96	NULL,		/* fmdo_close */
97	NULL,		/* fmdo_stats */
98	NULL,		/* fmdo_gc */
99	etm_send,	/* fmdo_send */
100};
101
102static const fmd_prop_t fmd_props[] = {
103	{ ETM_PROP_NM_XPORT_ADDRS,		FMD_TYPE_STRING, "" },
104	{ ETM_PROP_NM_DEBUG_LVL,		FMD_TYPE_INT32, "0" },
105	{ ETM_PROP_NM_DEBUG_MAX_EV_CNT,		FMD_TYPE_INT32, "-1" },
106	{ ETM_PROP_NM_CONSOLE,			FMD_TYPE_BOOL, "false" },
107	{ ETM_PROP_NM_SYSLOGD,			FMD_TYPE_BOOL, "true" },
108	{ ETM_PROP_NM_FACILITY,			FMD_TYPE_STRING, "LOG_DAEMON" },
109	{ ETM_PROP_NM_MAX_RESP_Q_LEN,		FMD_TYPE_UINT32, "32" },
110	{ ETM_PROP_NM_BAD_ACC_TO_SEC,		FMD_TYPE_UINT32, "1" },
111	{ ETM_PROP_NM_FMA_RESP_WAIT_TIME,	FMD_TYPE_INT32, "240" },
112	{ NULL, 0, NULL }
113};
114
115
116static const fmd_hdl_info_t fmd_info = {
117	"FMA Event Transport Module", "1.2", &fmd_ops, fmd_props
118};
119
120/*
121 * ----------------------- private consts and defns --------------------------
122 */
123
124/* misc buffer for variable sized protocol header fields */
125
126#define	ETM_MISC_BUF_SZ	(4 * 1024)
127
128static uint32_t
129etm_ldom_type = LDOM_TYPE_LEGACY;
130
131/* try limit for IO operations w/ capped exp backoff sleep on retry */
132
133/*
134 * Design_Note:	ETM will potentially retry forever IO operations that the
135 *		transport fails with EAGAIN (aka EWOULDBLOCK) rather than
136 *		giving up after some number of seconds. This avoids
137 *		dropping FMA events while the service processor is down,
138 *		but at the risk of pending fmdo_recv() forever and
139 *		overflowing FMD's event queue for ETM.
140 *		A future TBD enhancement would be to always recv
141 *		and send each ETM msg in a single read/write() to reduce
142 *		the risk of failure between ETM msg hdr and body,
143 *		assuming the MTU_SZ is large enough.
144 */
145
146#define	ETM_TRY_MAX_CNT		(MAXINT - 1)
147#define	ETM_TRY_BACKOFF_RATE	(4)
148#define	ETM_TRY_BACKOFF_CAP	(60)
149
150/* amount to increment protocol transaction id on each new send */
151
152#define	ETM_XID_INC		(2)
153
154typedef struct etm_resp_q_ele {
155
156	etm_xport_conn_t	rqe_conn;	/* open connection to send on */
157	etm_proto_v1_pp_t	*rqe_hdrp;	/* ptr to ETM msg hdr */
158	size_t			rqe_hdr_sz;	/* sizeof ETM msg hdr */
159	int32_t			rqe_resp_code;	/* response code to send */
160
161	struct etm_resp_q_ele	*rqe_nextp;	/* PRIVATE - next ele ptr */
162
163} etm_resp_q_ele_t;	/* responder queue element */
164
165/*
166 * ---------------------------- global data ----------------------------------
167 */
168
169static fmd_hdl_t
170*init_hdl = NULL;	/* used in mem allocator and several other places */
171
172static int
173etm_debug_lvl = 0;	/* debug level: 0 is off, 1 is on, 2 is more, etc */
174
175static int
176etm_debug_max_ev_cnt = -1; /* max allowed event count for debugging */
177
178static fmd_xprt_t
179*etm_fmd_xprt = NULL;	/* FMD transport layer handle */
180
181static pthread_t
182etm_svr_tid = NULL;	/* thread id of connection acceptance server */
183
184static pthread_t
185etm_resp_tid = NULL;	/* thread id of msg responder */
186
187static etm_resp_q_ele_t
188*etm_resp_q_head = NULL; /* ptr to cur head of responder queue */
189
190static etm_resp_q_ele_t
191*etm_resp_q_tail = NULL; /* ptr to cur tail of responder queue */
192
193static uint32_t
194etm_resp_q_cur_len = 0;	/* cur length (ele cnt) of responder queue */
195
196static uint32_t
197etm_resp_q_max_len = 0;	/* max length (ele cnt) of responder queue */
198
199static uint32_t
200etm_bad_acc_to_sec = 0;	/* sleep timeout (in sec) after bad conn accept */
201
202static pthread_mutex_t
203etm_resp_q_lock = PTHREAD_MUTEX_INITIALIZER;	/* protects responder queue */
204
205static pthread_cond_t
206etm_resp_q_cv = PTHREAD_COND_INITIALIZER;	/* nudges msg responder */
207
208static volatile int
209etm_is_dying = 0;	/* bool for dying (killing self) */
210
211static uint32_t
212etm_xid_cur = 0;	/* current transaction id for sends */
213
214static uint32_t
215etm_xid_ping = 0;	/* xid of last CONTROL msg sent requesting ping */
216
217static uint32_t
218etm_xid_ver_negot = 0;	/* xid of last CONTROL msg sent requesting ver negot */
219
220static uint32_t
221etm_xid_posted_logged_ev = 0;
222			/* xid of last FMA_EVENT msg/event posted OK to FMD */
223
224static uint32_t
225etm_xid_posted_sa = 0;	/* xid of last ALERT msg/event posted OK to syslog */
226
227static uint8_t
228etm_resp_ver = ETM_PROTO_V1; /* proto ver [negotiated] for msg sends */
229
230static uint32_t
231etm_fma_resp_wait_time = 30;	/*  time (sec) wait for fma event resp */
232
233static pthread_mutex_t
234etm_write_lock = PTHREAD_MUTEX_INITIALIZER;	/* for write operations */
235
236static log_ctl_t syslog_ctl;	/* log(7D) meta-data for each msg */
237static int syslog_facility;	/* log(7D) facility (part of priority) */
238static int syslog_logfd = -1;	/* log(7D) file descriptor */
239static int syslog_msgfd = -1;	/* sysmsg(7D) file descriptor */
240static int syslog_file = 0;	/* log to syslog_logfd */
241static int syslog_cons = 0;	/* log to syslog_msgfd */
242
243static const struct facility {
244	const char *fac_name;
245	int fac_value;
246} syslog_facs[] = {
247	{ "LOG_DAEMON", LOG_DAEMON },
248	{ "LOG_LOCAL0", LOG_LOCAL0 },
249	{ "LOG_LOCAL1", LOG_LOCAL1 },
250	{ "LOG_LOCAL2", LOG_LOCAL2 },
251	{ "LOG_LOCAL3", LOG_LOCAL3 },
252	{ "LOG_LOCAL4", LOG_LOCAL4 },
253	{ "LOG_LOCAL5", LOG_LOCAL5 },
254	{ "LOG_LOCAL6", LOG_LOCAL6 },
255	{ "LOG_LOCAL7", LOG_LOCAL7 },
256	{ NULL, 0 }
257};
258
259static struct stats {
260
261	/* ETM msg counters */
262
263	fmd_stat_t etm_rd_hdr_fmaevent;
264	fmd_stat_t etm_rd_hdr_control;
265	fmd_stat_t etm_rd_hdr_alert;
266	fmd_stat_t etm_rd_hdr_response;
267	fmd_stat_t etm_rd_body_fmaevent;
268	fmd_stat_t etm_rd_body_control;
269	fmd_stat_t etm_rd_body_alert;
270	fmd_stat_t etm_rd_body_response;
271	fmd_stat_t etm_wr_hdr_fmaevent;
272	fmd_stat_t etm_wr_hdr_control;
273	fmd_stat_t etm_wr_hdr_response;
274	fmd_stat_t etm_wr_body_fmaevent;
275	fmd_stat_t etm_wr_body_control;
276	fmd_stat_t etm_wr_body_response;
277
278	fmd_stat_t etm_rd_max_ev_per_msg;
279	fmd_stat_t etm_wr_max_ev_per_msg;
280
281	fmd_stat_t etm_resp_q_cur_len;
282	fmd_stat_t etm_resp_q_max_len;
283
284	/* ETM byte counters */
285
286	fmd_stat_t etm_wr_fmd_bytes;
287	fmd_stat_t etm_rd_fmd_bytes;
288	fmd_stat_t etm_wr_xport_bytes;
289	fmd_stat_t etm_rd_xport_bytes;
290
291	fmd_stat_t etm_magic_drop_bytes;
292
293	/* ETM [dropped] FMA event counters */
294
295	fmd_stat_t etm_rd_fmd_fmaevent;
296	fmd_stat_t etm_wr_fmd_fmaevent;
297
298	fmd_stat_t etm_rd_drop_fmaevent;
299	fmd_stat_t etm_wr_drop_fmaevent;
300
301	fmd_stat_t etm_rd_dup_fmaevent;
302	fmd_stat_t etm_wr_dup_fmaevent;
303
304	fmd_stat_t etm_rd_dup_alert;
305	fmd_stat_t etm_wr_dup_alert;
306
307	fmd_stat_t etm_enq_drop_resp_q;
308	fmd_stat_t etm_deq_drop_resp_q;
309
310	/* ETM protocol failures */
311
312	fmd_stat_t etm_magic_bad;
313	fmd_stat_t etm_ver_bad;
314	fmd_stat_t etm_msgtype_bad;
315	fmd_stat_t etm_subtype_bad;
316	fmd_stat_t etm_xid_bad;
317	fmd_stat_t etm_fmaeventlen_bad;
318	fmd_stat_t etm_respcode_bad;
319	fmd_stat_t etm_timeout_bad;
320	fmd_stat_t etm_evlens_bad;
321
322	/* IO operation failures */
323
324	fmd_stat_t etm_xport_wr_fail;
325	fmd_stat_t etm_xport_rd_fail;
326	fmd_stat_t etm_xport_pk_fail;
327
328	/* IO operation retries */
329
330	fmd_stat_t etm_xport_wr_retry;
331	fmd_stat_t etm_xport_rd_retry;
332	fmd_stat_t etm_xport_pk_retry;
333
334	/* system and library failures */
335
336	fmd_stat_t etm_os_nvlist_pack_fail;
337	fmd_stat_t etm_os_nvlist_unpack_fail;
338	fmd_stat_t etm_os_nvlist_size_fail;
339	fmd_stat_t etm_os_pthread_create_fail;
340
341	/* xport API failures */
342
343	fmd_stat_t etm_xport_get_ev_addrv_fail;
344	fmd_stat_t etm_xport_open_fail;
345	fmd_stat_t etm_xport_close_fail;
346	fmd_stat_t etm_xport_accept_fail;
347	fmd_stat_t etm_xport_open_retry;
348
349	/* FMD entry point bad arguments */
350
351	fmd_stat_t etm_fmd_init_badargs;
352	fmd_stat_t etm_fmd_fini_badargs;
353
354	/* Alert logging errors */
355
356	fmd_stat_t etm_log_err;
357	fmd_stat_t etm_msg_err;
358
359	/* miscellaneous stats */
360
361	fmd_stat_t etm_reset_xport;
362
363} etm_stats = {
364
365	/* ETM msg counters */
366
367	{ "etm_rd_hdr_fmaevent", FMD_TYPE_UINT64,
368		"ETM fmaevent msg headers rcvd from xport" },
369	{ "etm_rd_hdr_control", FMD_TYPE_UINT64,
370		"ETM control msg headers rcvd from xport" },
371	{ "etm_rd_hdr_alert", FMD_TYPE_UINT64,
372		"ETM alert msg headers rcvd from xport" },
373	{ "etm_rd_hdr_response", FMD_TYPE_UINT64,
374		"ETM response msg headers rcvd from xport" },
375	{ "etm_rd_body_fmaevent", FMD_TYPE_UINT64,
376		"ETM fmaevent msg bodies rcvd from xport" },
377	{ "etm_rd_body_control", FMD_TYPE_UINT64,
378		"ETM control msg bodies rcvd from xport" },
379	{ "etm_rd_body_alert", FMD_TYPE_UINT64,
380		"ETM alert msg bodies rcvd from xport" },
381	{ "etm_rd_body_response", FMD_TYPE_UINT64,
382		"ETM response msg bodies rcvd from xport" },
383	{ "etm_wr_hdr_fmaevent", FMD_TYPE_UINT64,
384		"ETM fmaevent msg headers sent to xport" },
385	{ "etm_wr_hdr_control", FMD_TYPE_UINT64,
386		"ETM control msg headers sent to xport" },
387	{ "etm_wr_hdr_response", FMD_TYPE_UINT64,
388		"ETM response msg headers sent to xport" },
389	{ "etm_wr_body_fmaevent", FMD_TYPE_UINT64,
390		"ETM fmaevent msg bodies sent to xport" },
391	{ "etm_wr_body_control", FMD_TYPE_UINT64,
392		"ETM control msg bodies sent to xport" },
393	{ "etm_wr_body_response", FMD_TYPE_UINT64,
394		"ETM response msg bodies sent to xport" },
395
396	{ "etm_rd_max_ev_per_msg", FMD_TYPE_UINT64,
397		"max FMA events per ETM msg from xport" },
398	{ "etm_wr_max_ev_per_msg", FMD_TYPE_UINT64,
399		"max FMA events per ETM msg to xport" },
400
401	{ "etm_resp_q_cur_len", FMD_TYPE_UINT64,
402		"cur enqueued response msgs to xport" },
403	{ "etm_resp_q_max_len", FMD_TYPE_UINT64,
404		"max enqueable response msgs to xport" },
405
406	/* ETM byte counters */
407
408	{ "etm_wr_fmd_bytes", FMD_TYPE_UINT64,
409		"bytes of FMA events sent to FMD" },
410	{ "etm_rd_fmd_bytes", FMD_TYPE_UINT64,
411		"bytes of FMA events rcvd from FMD" },
412	{ "etm_wr_xport_bytes", FMD_TYPE_UINT64,
413		"bytes of FMA events sent to xport" },
414	{ "etm_rd_xport_bytes", FMD_TYPE_UINT64,
415		"bytes of FMA events rcvd from xport" },
416
417	{ "etm_magic_drop_bytes", FMD_TYPE_UINT64,
418		"bytes dropped from xport pre magic num" },
419
420	/* ETM [dropped] FMA event counters */
421
422	{ "etm_rd_fmd_fmaevent", FMD_TYPE_UINT64,
423		"FMA events rcvd from FMD" },
424	{ "etm_wr_fmd_fmaevent", FMD_TYPE_UINT64,
425		"FMA events sent to FMD" },
426
427	{ "etm_rd_drop_fmaevent", FMD_TYPE_UINT64,
428		"dropped FMA events from xport" },
429	{ "etm_wr_drop_fmaevent", FMD_TYPE_UINT64,
430		"dropped FMA events to xport" },
431
432	{ "etm_rd_dup_fmaevent", FMD_TYPE_UINT64,
433	    "duplicate FMA events rcvd from xport" },
434	{ "etm_wr_dup_fmaevent", FMD_TYPE_UINT64,
435	    "duplicate FMA events sent to xport" },
436
437	{ "etm_rd_dup_alert", FMD_TYPE_UINT64,
438	    "duplicate ALERTs rcvd from xport" },
439	{ "etm_wr_dup_alert", FMD_TYPE_UINT64,
440	    "duplicate ALERTs sent to xport" },
441
442	{ "etm_enq_drop_resp_q", FMD_TYPE_UINT64,
443	    "dropped response msgs on enq" },
444	{ "etm_deq_drop_resp_q", FMD_TYPE_UINT64,
445	    "dropped response msgs on deq" },
446
447	/* ETM protocol failures */
448
449	{ "etm_magic_bad", FMD_TYPE_UINT64,
450		"ETM msgs w/ invalid magic num" },
451	{ "etm_ver_bad", FMD_TYPE_UINT64,
452		"ETM msgs w/ invalid protocol version" },
453	{ "etm_msgtype_bad", FMD_TYPE_UINT64,
454		"ETM msgs w/ invalid message type" },
455	{ "etm_subtype_bad", FMD_TYPE_UINT64,
456		"ETM msgs w/ invalid sub type" },
457	{ "etm_xid_bad", FMD_TYPE_UINT64,
458		"ETM msgs w/ unmatched xid" },
459	{ "etm_fmaeventlen_bad", FMD_TYPE_UINT64,
460		"ETM msgs w/ invalid FMA event length" },
461	{ "etm_respcode_bad", FMD_TYPE_UINT64,
462		"ETM msgs w/ invalid response code" },
463	{ "etm_timeout_bad", FMD_TYPE_UINT64,
464		"ETM msgs w/ invalid timeout value" },
465	{ "etm_evlens_bad", FMD_TYPE_UINT64,
466		"ETM msgs w/ too many event lengths" },
467
468	/* IO operation failures */
469
470	{ "etm_xport_wr_fail", FMD_TYPE_UINT64,
471		"xport write failures" },
472	{ "etm_xport_rd_fail", FMD_TYPE_UINT64,
473		"xport read failures" },
474	{ "etm_xport_pk_fail", FMD_TYPE_UINT64,
475		"xport peek failures" },
476
477	/* IO operation retries */
478
479	{ "etm_xport_wr_retry", FMD_TYPE_UINT64,
480		"xport write retries" },
481	{ "etm_xport_rd_retry", FMD_TYPE_UINT64,
482		"xport read retries" },
483	{ "etm_xport_pk_retry", FMD_TYPE_UINT64,
484		"xport peek retries" },
485
486	/* system and library failures */
487
488	{ "etm_os_nvlist_pack_fail", FMD_TYPE_UINT64,
489		"nvlist_pack failures" },
490	{ "etm_os_nvlist_unpack_fail", FMD_TYPE_UINT64,
491		"nvlist_unpack failures" },
492	{ "etm_os_nvlist_size_fail", FMD_TYPE_UINT64,
493		"nvlist_size failures" },
494	{ "etm_os_pthread_create_fail", FMD_TYPE_UINT64,
495		"pthread_create failures" },
496
497	/* transport API failures */
498
499	{ "etm_xport_get_ev_addrv_fail", FMD_TYPE_UINT64,
500		"xport get event addrv API failures" },
501	{ "etm_xport_open_fail", FMD_TYPE_UINT64,
502		"xport open API failures" },
503	{ "etm_xport_close_fail", FMD_TYPE_UINT64,
504		"xport close API failures" },
505	{ "etm_xport_accept_fail", FMD_TYPE_UINT64,
506		"xport accept API failures" },
507	{ "etm_xport_open_retry", FMD_TYPE_UINT64,
508		"xport open API retries" },
509
510	/* FMD entry point bad arguments */
511
512	{ "etm_fmd_init_badargs", FMD_TYPE_UINT64,
513	    "bad arguments from fmd_init entry point" },
514	{ "etm_fmd_fini_badargs", FMD_TYPE_UINT64,
515	    "bad arguments from fmd_fini entry point" },
516
517	/* Alert logging errors */
518
519	{ "etm_log_err", FMD_TYPE_UINT64,
520		"failed to log message to log(7D)" },
521	{ "etm_msg_err", FMD_TYPE_UINT64,
522		"failed to log message to sysmsg(7D)" },
523
524	/* miscellaneous stats */
525
526	{ "etm_reset_xport", FMD_TYPE_UINT64,
527		"xport resets after xport API failure" }
528};
529
530
531/*
532 * -------------------- global data for Root ldom-------------------------
533 */
534
535ldom_hdl_t
536*etm_lhp = NULL;		/* ldom pointer */
537
538static void *etm_dl_hdl = (void *)NULL;
539static const char *etm_dl_path = "libds.so.1";
540static int etm_dl_mode = (RTLD_NOW | RTLD_LOCAL);
541
542static int(*etm_ds_svc_reg)(ds_capability_t *cap, ds_ops_t *ops) =
543	(int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL;
544static int(*etm_ds_clnt_reg)(ds_capability_t *cap, ds_ops_t *ops) =
545	(int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL;
546static int(*etm_ds_send_msg)(ds_hdl_t hdl, void *buf, size_t buflen) =
547	(int (*)(ds_hdl_t hdl, void *buf, size_t buflen))NULL;
548static int(*etm_ds_recv_msg)(ds_hdl_t hdl, void *buf, size_t buflen,
549    size_t *msglen) =
550	(int (*)(ds_hdl_t hdl, void *buf, size_t buflen, size_t *msglen))NULL;
551static int (*etm_ds_fini)(void) = (int (*)(void))NULL;
552
553static pthread_mutex_t
554iosvc_list_lock =  PTHREAD_MUTEX_INITIALIZER;
555
556static pthread_t
557etm_async_e_tid = NULL;	/* thread id of io svc async event handler */
558
559static etm_proto_v1_ev_hdr_t iosvc_hdr = {
560	ETM_PROTO_MAGIC_NUM,	/* magic number */
561	ETM_PROTO_V1,		/* default to V1, not checked */
562	ETM_MSG_TYPE_FMA_EVENT,	/* Root Domain inteoduces only FMA events */
563	0,			/* sub-type */
564	0,			/* pad */
565	0,			/* add the xid at the Q send time */
566	ETM_PROTO_V1_TIMEOUT_NONE,
567	0			/* ev_lens, 0-termed, after 1 FMA event */
568};
569
570/*
571 * static iosvc_list
572 */
573static etm_iosvc_t iosvc_list[NUM_OF_ROOT_DOMAINS] = {
574	{"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0},
575	{"", 0}, {"", 0}
576};
577
578static etm_iosvc_t io_svc = {
579	"\0",				/* ldom_name */
580	PTHREAD_COND_INITIALIZER,	/* nudges */
581	PTHREAD_MUTEX_INITIALIZER,	/* protects the iosvc msg Q */
582	NULL,				/* iosvc msg Q head */
583	NULL,				/* iosvc msg Q tail */
584	0,				/* msg Q current length */
585	100,				/* msg Q max length */
586	0,				/* current transaction id */
587	0,				/* xid of last event posted to FMD */
588	DS_INVALID_HDL,			/* DS handle */
589	NULL,				/* fmd xprt handle */
590	NULL,				/* tid 4 send to remote RootDomain */
591	NULL,				/* tid 4 recv from remote RootDomain */
592	PTHREAD_COND_INITIALIZER,	/* nudges etm_send_to_remote_root */
593	PTHREAD_MUTEX_INITIALIZER,	/* protects msg_ack_cv */
594	0,				/* send/recv threads are not dying */
595	0,				/* flag for start sending msg Q */
596	0				/* indicate if the ACK has come  */
597};
598etm_iosvc_t *io_svc_p = &io_svc;
599
600
601static uint32_t
602flags;					/* flags for fmd_xprt_open */
603
604static etm_async_event_ele_t
605async_event_q[ASYNC_EVENT_Q_SIZE];	/* holds the async events */
606
607static uint32_t
608etm_async_q_head = 0;		/* ptr to cur head of async event queue */
609
610static uint32_t
611etm_async_q_tail = 0;		/* ptr to cur tail of async event queue */
612
613static uint32_t
614etm_async_q_cur_len = 0;	/* cur length (ele cnt) of async event queue */
615
616static uint32_t
617etm_async_q_max_len = ASYNC_EVENT_Q_SIZE;
618				/* max length (ele cnt) of async event queue */
619
620static pthread_cond_t
621etm_async_event_q_cv = PTHREAD_COND_INITIALIZER;
622				/* nudges  async event handler */
623
624static pthread_mutex_t
625etm_async_event_q_lock = PTHREAD_MUTEX_INITIALIZER;
626				/* protects async event q */
627
628static ds_ver_t
629etm_iosvc_vers[] = { { 1, 0} };
630
631#define	ETM_NVERS	(sizeof (etm_iosvc_vers) / sizeof (ds_ver_t))
632
633static ds_capability_t
634iosvc_caps = {
635	"ETM",				/* svc_id */
636	etm_iosvc_vers,			/* vers */
637	ETM_NVERS			/* number of vers */
638};
639
640static void
641etm_iosvc_reg_handler(ds_hdl_t hdl, ds_cb_arg_t arg, ds_ver_t *ver,
642    ds_domain_hdl_t did);
643
644static void
645etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg);
646
647static ds_ops_t
648iosvc_ops = {
649	etm_iosvc_reg_handler,		/* ds_reg_cb */
650	etm_iosvc_unreg_handler,	/* ds_unreg_cb */
651	NULL,				/* ds_data_cb */
652	NULL				/* cb_arg */
653};
654
655
656/*
657 * -------------------------- support functions ------------------------------
658 */
659
660/*
661 * Design_Note:	Each failure worth reporting to FMD should be done using
662 *		a single call to fmd_hdl_error() as it logs an FMA event
663 *		for each call. Also be aware that all the fmd_hdl_*()
664 *		format strings currently use platform specific *printf()
665 *		routines; so "%p" under Solaris does not prepend "0x" to
666 *		the outputted hex digits, while Linux and VxWorks do.
667 */
668
669
670/*
671 * etm_show_time - display the current time of day (for debugging) using
672 *		the given FMD module handle and annotation string
673 */
674
675static void
676etm_show_time(fmd_hdl_t *hdl, char *note_str)
677{
678	struct timeval		tmv;		/* timeval */
679
680	(void) gettimeofday(&tmv, NULL);
681	fmd_hdl_debug(hdl, "info: %s: cur Unix Epoch time %d.%06d\n",
682	    note_str, tmv.tv_sec, tmv.tv_usec);
683
684} /* etm_show_time() */
685
686/*
687 * etm_hexdump - hexdump the given buffer (for debugging) using
688 *		the given FMD module handle
689 */
690
691static void
692etm_hexdump(fmd_hdl_t *hdl, void *buf, size_t byte_cnt)
693{
694	uint8_t		*bp;		/* byte ptr */
695	int		i, j;		/* index */
696	char		cb[80];		/* char buf */
697	unsigned int	n;		/* a byte of data for sprintf() */
698
699	bp = buf;
700	j = 0;
701
702	/*
703	 * Design_Note:	fmd_hdl_debug() auto adds a newline if missing;
704	 *		hence cb exists to accumulate a longer string.
705	 */
706
707	for (i = 1; i <= byte_cnt; i++) {
708		n = *bp++;
709		(void) sprintf(&cb[j], "%2.2x ", n);
710		j += 3;
711		/* add a newline every 16 bytes or at the buffer's end */
712		if (((i % 16) == 0) || (i >= byte_cnt)) {
713			cb[j-1] = '\0';
714			fmd_hdl_debug(hdl, "%s\n", cb);
715			j = 0;
716		}
717	} /* for each byte in the buffer */
718
719} /* etm_hexdump() */
720
721/*
722 * etm_sleep - sleep the caller for the given number of seconds,
723 *		return 0 or -errno value
724 *
725 * Design_Note:	To avoid interfering with FMD's signal mask (SIGALRM)
726 *		do not use [Solaris] sleep(3C) and instead use
727 *		pthread_cond_wait() or nanosleep(), both of which
728 *		are POSIX spec-ed to leave signal masks alone.
729 *		This is needed for Solaris and Linux (domain and SP).
730 */
731
732static int
733etm_sleep(unsigned sleep_sec)
734{
735	struct timespec	tms;	/* for nanosleep() */
736
737	tms.tv_sec = sleep_sec;
738	tms.tv_nsec = 0;
739
740	if (nanosleep(&tms, NULL) < 0) {
741		/* errno assumed set by above call */
742		return (-errno);
743	}
744	return (0);
745
746} /* etm_sleep() */
747
748/*
749 * etm_conn_open - open a connection to the given transport address,
750 *		return 0 and the opened connection handle
751 *		or -errno value
752 *
753 * caveats:	the err_substr is used in failure cases for calling
754 *		fmd_hdl_error()
755 */
756
757static int
758etm_conn_open(fmd_hdl_t *hdl, char *err_substr,
759		etm_xport_addr_t addr, etm_xport_conn_t *connp)
760{
761	etm_xport_conn_t	conn;	/* connection to return */
762	int			nev;	/* -errno value */
763
764	if ((conn = etm_xport_open(hdl, addr)) == NULL) {
765		nev = (-errno);
766		fmd_hdl_error(hdl, "error: %s: errno %d\n",
767		    err_substr, errno);
768		etm_stats.etm_xport_open_fail.fmds_value.ui64++;
769		return (nev);
770	} else {
771		*connp = conn;
772		return (0);
773	}
774} /* etm_conn_open() */
775
776/*
777 * etm_conn_close - close the given connection,
778 *		return 0 or -errno value
779 *
780 * caveats:	the err_substr is used in failure cases for calling
781 *		fmd_hdl_error()
782 */
783
784static int
785etm_conn_close(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn)
786{
787	int	nev;	/* -errno value */
788
789	if (etm_xport_close(hdl, conn) == NULL) {
790		nev = (-errno);
791		fmd_hdl_error(hdl, "warning: %s: errno %d\n",
792		    err_substr, errno);
793		etm_stats.etm_xport_close_fail.fmds_value.ui64++;
794		return (nev);
795	} else {
796		return (0);
797	}
798} /* etm_conn_close() */
799
800/*
801 * etm_io_op - perform an IO operation on the given connection
802 *		with the given buffer,
803 *		accommodating MTU size and retrying op if needed,
804 *		return how many bytes actually done by the op
805 *		or -errno value
806 *
807 * caveats:	the err_substr is used in failure cases for calling
808 *		fmd_hdl_error()
809 */
810
811static ssize_t
812etm_io_op(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn,
813				void *buf, size_t byte_cnt, int io_op)
814{
815	ssize_t		rv;		/* ret val / byte count */
816	ssize_t		n;		/* gen use */
817	uint8_t		*datap;		/* ptr to data */
818	size_t		mtu_sz;		/* MTU size in bytes */
819	int		(*io_func_ptr)(fmd_hdl_t *, etm_xport_conn_t,
820	    void *, size_t);
821	size_t		io_sz;		/* byte count for io_func_ptr */
822	int		try_cnt;	/* number of tries done */
823	int		sleep_sec;	/* exp backoff sleep period in sec */
824	int		sleep_rv;	/* ret val from sleeping */
825	fmd_stat_t	io_retry_stat;	/* IO retry stat to update */
826	fmd_stat_t	io_fail_stat;	/* IO failure stat to update */
827
828	if ((conn == NULL) || (buf == NULL)) {
829		return (-EINVAL);
830	}
831	switch (io_op) {
832	case ETM_IO_OP_RD:
833		io_func_ptr = etm_xport_read;
834		io_retry_stat = etm_stats.etm_xport_rd_retry;
835		io_fail_stat = etm_stats.etm_xport_rd_fail;
836		break;
837	case ETM_IO_OP_WR:
838		io_func_ptr = etm_xport_write;
839		io_retry_stat = etm_stats.etm_xport_wr_retry;
840		io_fail_stat = etm_stats.etm_xport_wr_fail;
841		break;
842	default:
843		return (-EINVAL);
844	}
845	if (byte_cnt == 0) {
846		return (byte_cnt);	/* nop */
847	}
848
849	/* obtain [current] MTU size */
850
851	if ((n = etm_xport_get_opt(hdl, conn, ETM_XPORT_OPT_MTU_SZ)) < 0) {
852		mtu_sz = ETM_XPORT_MTU_SZ_DEF;
853	} else {
854		mtu_sz = n;
855	}
856
857	/* loop until all IO done, try limit exceeded, or real failure */
858
859	rv = 0;
860	datap = buf;
861	while (rv < byte_cnt) {
862		io_sz = MIN((byte_cnt - rv), mtu_sz);
863		try_cnt = 0;
864		sleep_sec = 0;
865
866		/* when give up, return -errno value even if partly done */
867
868		while ((n = (*io_func_ptr)(hdl, conn, datap, io_sz)) ==
869		    (-EAGAIN)) {
870			try_cnt++;
871			if (try_cnt > ETM_TRY_MAX_CNT) {
872				rv = n;
873				goto func_ret;
874			}
875			if (etm_is_dying) {
876				rv = (-EINTR);
877				goto func_ret;
878			}
879			if ((sleep_rv = etm_sleep(sleep_sec)) < 0) {
880				rv = sleep_rv;
881				goto func_ret;
882			}
883			sleep_sec = ((sleep_sec == 0) ? 1 :
884			    (sleep_sec * ETM_TRY_BACKOFF_RATE));
885			sleep_sec = MIN(sleep_sec, ETM_TRY_BACKOFF_CAP);
886			io_retry_stat.fmds_value.ui64++;
887			if (etm_debug_lvl >= 1) {
888				fmd_hdl_debug(hdl, "info: retrying io op %d "
889				    "due to EAGAIN\n", io_op);
890			}
891		} /* while trying the io operation */
892
893		if (etm_is_dying) {
894			rv = (-EINTR);
895			goto func_ret;
896		}
897		if (n < 0) {
898			rv = n;
899			goto func_ret;
900		}
901		/* avoid spinning CPU when given 0 bytes but no error */
902		if (n == 0) {
903			if ((sleep_rv = etm_sleep(ETM_SLEEP_QUIK)) < 0) {
904				rv = sleep_rv;
905				goto func_ret;
906			}
907		}
908		rv += n;
909		datap += n;
910	} /* while still have more data */
911
912func_ret:
913
914	if (rv < 0) {
915		io_fail_stat.fmds_value.ui64++;
916		fmd_hdl_debug(hdl, "error: %s: errno %d\n",
917		    err_substr, (int)(-rv));
918	}
919	if (etm_debug_lvl >= 3) {
920		fmd_hdl_debug(hdl, "info: io op %d ret %d of %d\n",
921		    io_op, (int)rv, (int)byte_cnt);
922	}
923	return (rv);
924
925} /* etm_io_op() */
926
927/*
928 * etm_magic_read - read the magic number of an ETM message header
929 *		from the given connection into the given buffer,
930 *		return 0 or -errno value
931 *
932 * Design_Note:	This routine is intended to help protect ETM from protocol
933 *		framing errors as might be caused by an SP reset / crash in
934 *		the middle of an ETM message send; the connection will be
935 *		read from for as many bytes as needed until the magic number
936 *		is found using a sliding buffer for comparisons.
937 */
938
939static int
940etm_magic_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, uint32_t *magic_ptr)
941{
942	int		rv;		/* ret val */
943	uint32_t	magic_num;	/* magic number */
944	int		byte_cnt;	/* count of bytes read */
945	uint8_t		buf5[4+1];	/* sliding input buffer */
946	int		i, j;		/* indices into buf5 */
947	ssize_t		n;		/* gen use */
948	uint8_t		drop_buf[1024];	/* dropped bytes buffer */
949
950	rv = 0;		/* assume success */
951	magic_num = 0;
952	byte_cnt = 0;
953	j = 0;
954
955	/* magic number bytes are sent in network (big endian) order */
956
957	while (magic_num != ETM_PROTO_MAGIC_NUM) {
958		if ((n = etm_io_op(hdl, "bad io read on magic",
959		    conn, &buf5[j], 1, ETM_IO_OP_RD)) < 0) {
960			rv = n;
961			goto func_ret;
962		}
963		byte_cnt++;
964		j = MIN((j + 1), sizeof (magic_num));
965		if (byte_cnt < sizeof (magic_num)) {
966			continue;
967		}
968
969		if (byte_cnt > sizeof (magic_num)) {
970			etm_stats.etm_magic_drop_bytes.fmds_value.ui64++;
971			i = MIN(byte_cnt - j - 1, sizeof (drop_buf) - 1);
972			drop_buf[i] = buf5[0];
973			for (i = 0; i < j; i++) {
974				buf5[i] = buf5[i+1];
975			} /* for sliding the buffer contents */
976		}
977		(void) memcpy(&magic_num, &buf5[0], sizeof (magic_num));
978		magic_num = ntohl(magic_num);
979	} /* for reading bytes until find magic number */
980
981func_ret:
982
983	if (byte_cnt != sizeof (magic_num)) {
984		fmd_hdl_debug(hdl, "warning: bad proto frame "
985		    "implies corrupt/lost msg(s)\n");
986	}
987	if ((byte_cnt > sizeof (magic_num)) && (etm_debug_lvl >= 2)) {
988		i = MIN(byte_cnt - sizeof (magic_num), sizeof (drop_buf));
989		fmd_hdl_debug(hdl, "info: magic drop hexdump "
990		    "first %d of %d bytes:\n", i,
991		    byte_cnt - sizeof (magic_num));
992		etm_hexdump(hdl, drop_buf, i);
993	}
994
995	if (rv == 0) {
996		*magic_ptr = magic_num;
997	}
998	return (rv);
999
1000} /* etm_magic_read() */
1001
1002/*
1003 * etm_hdr_read - allocate, read, and validate a [variable sized]
1004 *		ETM message header from the given connection,
1005 *		return the allocated ETM message header
1006 *		(which is guaranteed to be large enough to reuse as a
1007 *		RESPONSE msg hdr) and its size
1008 *		or NULL and set errno on failure
1009 */
1010
1011static void *
1012etm_hdr_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, size_t *szp)
1013{
1014	uint8_t			*hdrp;		/* ptr to header to return */
1015	size_t			hdr_sz;		/* sizeof *hdrp */
1016	etm_proto_v1_pp_t	pp; 		/* protocol preamble */
1017	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* for FMA_EVENT msg */
1018	etm_proto_v1_ctl_hdr_t	*ctl_hdrp;	/* for CONTROL msg */
1019	etm_proto_v1_resp_hdr_t *resp_hdrp;	/* for RESPONSE msg */
1020	etm_proto_v3_sa_hdr_t	*sa_hdrp;	/* for ALERT msg */
1021	uint32_t		*lenp;		/* ptr to FMA event length */
1022	ssize_t			i, n;		/* gen use */
1023	uint8_t	misc_buf[ETM_MISC_BUF_SZ];	/* for var sized hdrs */
1024	int			dummy_int;	/* dummy var to appease lint */
1025
1026	hdrp = NULL; hdr_sz = 0;
1027
1028	/* read the magic number which starts the protocol preamble */
1029
1030	if ((n = etm_magic_read(hdl, conn, &pp.pp_magic_num)) < 0) {
1031		errno = (-n);
1032		etm_stats.etm_magic_bad.fmds_value.ui64++;
1033		return (NULL);
1034	}
1035
1036	/* read the rest of the protocol preamble all at once */
1037
1038	if ((n = etm_io_op(hdl, "bad io read on preamble",
1039	    conn, &pp.pp_proto_ver, sizeof (pp) - sizeof (pp.pp_magic_num),
1040	    ETM_IO_OP_RD)) < 0) {
1041		errno = (-n);
1042		return (NULL);
1043	}
1044
1045	/*
1046	 * Design_Note:	The magic number was already network decoded; but
1047	 *		some other preamble fields also need to be decoded,
1048	 *		specifically pp_xid and pp_timeout. The rest of the
1049	 *		preamble fields are byte sized and hence need no
1050	 *		decoding.
1051	 */
1052
1053	pp.pp_xid = ntohl(pp.pp_xid);
1054	pp.pp_timeout = ntohl(pp.pp_timeout);
1055
1056	/* sanity check the header as best we can */
1057
1058	if ((pp.pp_proto_ver < ETM_PROTO_V1) ||
1059	    (pp.pp_proto_ver > ETM_PROTO_V3)) {
1060		fmd_hdl_error(hdl, "error: bad proto ver %d\n",
1061		    (int)pp.pp_proto_ver);
1062		errno = EPROTO;
1063		etm_stats.etm_ver_bad.fmds_value.ui64++;
1064		return (NULL);
1065	}
1066
1067	dummy_int = pp.pp_msg_type;
1068	if ((dummy_int <= ETM_MSG_TYPE_TOO_LOW) ||
1069	    (dummy_int >= ETM_MSG_TYPE_TOO_BIG)) {
1070		fmd_hdl_error(hdl, "error: bad msg type %d", dummy_int);
1071		errno = EBADMSG;
1072		etm_stats.etm_msgtype_bad.fmds_value.ui64++;
1073		return (NULL);
1074	}
1075
1076	/* handle [var sized] hdrs for FMA_EVENT, CONTROL, RESPONSE msgs */
1077
1078	if (pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) {
1079
1080		ev_hdrp = (void*)&misc_buf[0];
1081		hdr_sz = sizeof (*ev_hdrp);
1082		(void) memcpy(&ev_hdrp->ev_pp, &pp, sizeof (pp));
1083
1084		/* sanity check the header's timeout */
1085
1086		if ((ev_hdrp->ev_pp.pp_proto_ver == ETM_PROTO_V1) &&
1087		    (ev_hdrp->ev_pp.pp_timeout != ETM_PROTO_V1_TIMEOUT_NONE)) {
1088			errno = ETIME;
1089			etm_stats.etm_timeout_bad.fmds_value.ui64++;
1090			return (NULL);
1091		}
1092
1093		/* get all FMA event lengths from the header */
1094
1095		lenp = (uint32_t *)&ev_hdrp->ev_lens[0]; lenp--;
1096		i = -1;	/* cnt of length entries preceding 0 */
1097		do {
1098			i++; lenp++;
1099			if ((sizeof (*ev_hdrp) + (i * sizeof (*lenp))) >=
1100			    ETM_MISC_BUF_SZ) {
1101				errno = E2BIG;	/* ridiculous size */
1102				etm_stats.etm_evlens_bad.fmds_value.ui64++;
1103				return (NULL);
1104			}
1105			if ((n = etm_io_op(hdl, "bad io read on event len",
1106			    conn, lenp, sizeof (*lenp), ETM_IO_OP_RD)) < 0) {
1107				errno = (-n);
1108				return (NULL);
1109			}
1110			*lenp = ntohl(*lenp);
1111
1112		} while (*lenp != 0);
1113		i += 0; /* first len already counted by sizeof(ev_hdr) */
1114		hdr_sz += (i * sizeof (*lenp));
1115
1116		etm_stats.etm_rd_hdr_fmaevent.fmds_value.ui64++;
1117
1118	} else if (pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) {
1119
1120		ctl_hdrp = (void*)&misc_buf[0];
1121		hdr_sz = sizeof (*ctl_hdrp);
1122		(void) memcpy(&ctl_hdrp->ctl_pp, &pp, sizeof (pp));
1123
1124		/* sanity check the header's sub type (control selector) */
1125
1126		if ((ctl_hdrp->ctl_pp.pp_sub_type <= ETM_CTL_SEL_TOO_LOW) ||
1127		    (ctl_hdrp->ctl_pp.pp_sub_type >= ETM_CTL_SEL_TOO_BIG)) {
1128			fmd_hdl_error(hdl, "error: bad ctl sub type %d\n",
1129			    (int)ctl_hdrp->ctl_pp.pp_sub_type);
1130			errno = EBADMSG;
1131			etm_stats.etm_subtype_bad.fmds_value.ui64++;
1132			return (NULL);
1133		}
1134
1135		/* get the control length */
1136
1137		if ((n = etm_io_op(hdl, "bad io read on ctl len",
1138		    conn, &ctl_hdrp->ctl_len, sizeof (ctl_hdrp->ctl_len),
1139		    ETM_IO_OP_RD)) < 0) {
1140			errno = (-n);
1141			return (NULL);
1142		}
1143
1144		ctl_hdrp->ctl_len = ntohl(ctl_hdrp->ctl_len);
1145
1146		etm_stats.etm_rd_hdr_control.fmds_value.ui64++;
1147
1148	} else if (pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) {
1149
1150		resp_hdrp = (void*)&misc_buf[0];
1151		hdr_sz = sizeof (*resp_hdrp);
1152		(void) memcpy(&resp_hdrp->resp_pp, &pp, sizeof (pp));
1153
1154		/* sanity check the header's timeout */
1155
1156		if (resp_hdrp->resp_pp.pp_timeout !=
1157		    ETM_PROTO_V1_TIMEOUT_NONE) {
1158			errno = ETIME;
1159			etm_stats.etm_timeout_bad.fmds_value.ui64++;
1160			return (NULL);
1161		}
1162
1163		/* get the response code and length */
1164
1165		if ((n = etm_io_op(hdl, "bad io read on resp code+len",
1166		    conn, &resp_hdrp->resp_code,
1167		    sizeof (resp_hdrp->resp_code)
1168		    + sizeof (resp_hdrp->resp_len),
1169		    ETM_IO_OP_RD)) < 0) {
1170			errno = (-n);
1171			return (NULL);
1172		}
1173
1174		resp_hdrp->resp_code = ntohl(resp_hdrp->resp_code);
1175		resp_hdrp->resp_len = ntohl(resp_hdrp->resp_len);
1176
1177		etm_stats.etm_rd_hdr_response.fmds_value.ui64++;
1178
1179	} else if (pp.pp_msg_type == ETM_MSG_TYPE_ALERT) {
1180
1181		sa_hdrp = (void*)&misc_buf[0];
1182		hdr_sz = sizeof (*sa_hdrp);
1183		(void) memcpy(&sa_hdrp->sa_pp, &pp, sizeof (pp));
1184
1185		/* sanity check the header's protocol version */
1186
1187		if (sa_hdrp->sa_pp.pp_proto_ver != ETM_PROTO_V3) {
1188			errno = EPROTO;
1189			etm_stats.etm_ver_bad.fmds_value.ui64++;
1190			return (NULL);
1191		}
1192
1193		/* get the priority and length */
1194
1195		if ((n = etm_io_op(hdl, "bad io read on sa priority+len",
1196		    conn, &sa_hdrp->sa_priority,
1197		    sizeof (sa_hdrp->sa_priority)
1198		    + sizeof (sa_hdrp->sa_len),
1199		    ETM_IO_OP_RD)) < 0) {
1200			errno = (-n);
1201			return (NULL);
1202		}
1203
1204		sa_hdrp->sa_priority = ntohl(sa_hdrp->sa_priority);
1205		sa_hdrp->sa_len = ntohl(sa_hdrp->sa_len);
1206
1207		etm_stats.etm_rd_hdr_alert.fmds_value.ui64++;
1208
1209	} /* whether we have FMA_EVENT, ALERT, CONTROL, or RESPONSE msg */
1210
1211	/*
1212	 * choose a header size that allows hdr reuse for RESPONSE msgs,
1213	 * allocate and populate the message header, and
1214	 * return alloc size to caller for later free of hdrp
1215	 */
1216
1217	hdr_sz = MAX(hdr_sz, sizeof (*resp_hdrp));
1218	hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP);
1219	(void) memcpy(hdrp, misc_buf, hdr_sz);
1220
1221	if (etm_debug_lvl >= 3) {
1222		fmd_hdl_debug(hdl, "info: msg hdr hexdump %d bytes:\n", hdr_sz);
1223		etm_hexdump(hdl, hdrp, hdr_sz);
1224	}
1225	*szp = hdr_sz;
1226	return (hdrp);
1227
1228} /* etm_hdr_read() */
1229
1230/*
1231 * etm_hdr_write - create and write a [variable sized] ETM message header
1232 *		to the given connection appropriate for the given FMA event
1233 *		and type of nvlist encoding,
1234 *		return the allocated ETM message header and its size
1235 *		or NULL and set errno on failure
1236 */
1237
1238static void*
1239etm_hdr_write(fmd_hdl_t *hdl, etm_xport_conn_t conn, nvlist_t *evp,
1240						int encoding, size_t *szp)
1241{
1242	etm_proto_v1_ev_hdr_t	*hdrp;		/* for FMA_EVENT msg */
1243	size_t			hdr_sz;		/* sizeof *hdrp */
1244	uint32_t		*lenp;		/* ptr to FMA event length */
1245	size_t			evsz;		/* packed FMA event size */
1246	ssize_t			n;		/* gen use */
1247
1248	/* allocate and populate the message header for 1 FMA event */
1249
1250	hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0]));
1251
1252	hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP);
1253
1254	/*
1255	 * Design_Note: Although the ETM protocol supports it, we do not (yet)
1256	 *		want responses/ACKs on FMA events that we send. All
1257	 *		such messages are sent with ETM_PROTO_V1_TIMEOUT_NONE.
1258	 */
1259
1260	hdrp->ev_pp.pp_magic_num = ETM_PROTO_MAGIC_NUM;
1261	hdrp->ev_pp.pp_magic_num = htonl(hdrp->ev_pp.pp_magic_num);
1262	hdrp->ev_pp.pp_proto_ver = ETM_PROTO_V1;
1263	hdrp->ev_pp.pp_msg_type = ETM_MSG_TYPE_FMA_EVENT;
1264	hdrp->ev_pp.pp_sub_type = 0;
1265	hdrp->ev_pp.pp_rsvd_pad = 0;
1266	hdrp->ev_pp.pp_xid = etm_xid_cur;
1267	hdrp->ev_pp.pp_xid = htonl(hdrp->ev_pp.pp_xid);
1268	etm_xid_cur += ETM_XID_INC;
1269	hdrp->ev_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE;
1270	hdrp->ev_pp.pp_timeout = htonl(hdrp->ev_pp.pp_timeout);
1271
1272	lenp = &hdrp->ev_lens[0];
1273
1274	if ((n = nvlist_size(evp, &evsz, encoding)) != 0) {
1275		errno = n;
1276		fmd_hdl_free(hdl, hdrp, hdr_sz);
1277		etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++;
1278		return (NULL);
1279	}
1280
1281	/* indicate 1 FMA event, network encode its length, and 0-terminate */
1282
1283	etm_stats.etm_wr_max_ev_per_msg.fmds_value.ui64 = 1;
1284
1285	*lenp = evsz; *lenp = htonl(*lenp); lenp++;
1286	*lenp = 0; *lenp = htonl(*lenp); lenp++;
1287
1288	/*
1289	 * write the network encoded header to the transport, and
1290	 * return alloc size to caller for later free
1291	 */
1292
1293	if ((n = etm_io_op(hdl, "bad io write on event hdr",
1294	    conn, hdrp, hdr_sz, ETM_IO_OP_WR)) < 0) {
1295		errno = (-n);
1296		fmd_hdl_free(hdl, hdrp, hdr_sz);
1297		return (NULL);
1298	}
1299
1300	*szp = hdr_sz;
1301	return (hdrp);
1302
1303} /* etm_hdr_write() */
1304
1305/*
1306 * etm_post_to_fmd - post the given FMA event to FMD
1307 *			via a FMD transport API call,
1308 *			return 0 or -errno value
1309 *
1310 * caveats:	the FMA event (evp) is freed by FMD,
1311 *		thus callers of this function should
1312 *		immediately discard any ptr they have to the
1313 *		nvlist without freeing or dereferencing it
1314 */
1315
1316static int
1317etm_post_to_fmd(fmd_hdl_t *hdl, fmd_xprt_t *fmd_xprt, nvlist_t *evp)
1318{
1319	ssize_t			ev_sz;		/* sizeof *evp */
1320
1321	(void) nvlist_size(evp, (size_t *)&ev_sz, NV_ENCODE_XDR);
1322
1323	if (etm_debug_lvl >= 2) {
1324		etm_show_time(hdl, "ante ev post");
1325	}
1326	fmd_xprt_post(hdl, fmd_xprt, evp, 0);
1327	etm_stats.etm_wr_fmd_fmaevent.fmds_value.ui64++;
1328	etm_stats.etm_wr_fmd_bytes.fmds_value.ui64 += ev_sz;
1329	if (etm_debug_lvl >= 1) {
1330		fmd_hdl_debug(hdl, "info: event %p post ok to FMD\n", evp);
1331	}
1332	if (etm_debug_lvl >= 2) {
1333		etm_show_time(hdl, "post ev post");
1334	}
1335	return (0);
1336
1337} /* etm_post_to_fmd() */
1338
1339/*
1340 * Ideally we would just use syslog(3C) for outputting our messages.
1341 * Unfortunately, as this module is running within the FMA daemon context,
1342 * that would create the situation where this module's openlog() would
1343 * have the monopoly on syslog(3C) for the daemon and all its modules.
1344 * To avoid that situation, this module uses the same logic as the
1345 * syslog-msgs FM module to directly call into the log(7D) and sysmsg(7D)
1346 * devices for syslog and console.
1347 */
1348
1349static int
1350etm_post_to_syslog(fmd_hdl_t *hdl, uint32_t priority, uint32_t body_sz,
1351							uint8_t *body_buf)
1352{
1353	char		*sysmessage;	/* Formatted message */
1354	size_t		formatlen;	/* maximum length of sysmessage */
1355	struct strbuf	ctl, dat;	/* structs pushed to the logfd */
1356	uint32_t	msgid;		/* syslog message ID number */
1357
1358	if ((syslog_file == 0) && (syslog_cons == 0)) {
1359		return (0);
1360	}
1361
1362	if (etm_debug_lvl >= 2) {
1363		etm_show_time(hdl, "ante syslog post");
1364	}
1365
1366	formatlen = body_sz + 64; /* +64 for prefix strings added below */
1367	sysmessage = fmd_hdl_zalloc(hdl, formatlen, FMD_SLEEP);
1368
1369	if (syslog_file) {
1370		STRLOG_MAKE_MSGID(body_buf, msgid);
1371		(void) snprintf(sysmessage, formatlen,
1372		    "SC Alert: [ID %u FACILITY_AND_PRIORITY] %s", msgid,
1373		    body_buf);
1374
1375		syslog_ctl.pri = syslog_facility | priority;
1376
1377		ctl.buf = (void *)&syslog_ctl;
1378		ctl.len = sizeof (syslog_ctl);
1379
1380		dat.buf = sysmessage;
1381		dat.len = strlen(sysmessage) + 1;
1382
1383		if (putmsg(syslog_logfd, &ctl, &dat, 0) != 0) {
1384			fmd_hdl_debug(hdl, "putmsg failed: %s\n",
1385			    strerror(errno));
1386			etm_stats.etm_log_err.fmds_value.ui64++;
1387		}
1388	}
1389
1390	if (syslog_cons) {
1391		(void) snprintf(sysmessage, formatlen,
1392		    "SC Alert: %s\r\n", body_buf);
1393
1394		dat.buf = sysmessage;
1395		dat.len = strlen(sysmessage) + 1;
1396
1397		if (write(syslog_msgfd, dat.buf, dat.len) != dat.len) {
1398			fmd_hdl_debug(hdl, "write failed: %s\n",
1399			    strerror(errno));
1400			etm_stats.etm_msg_err.fmds_value.ui64++;
1401		}
1402	}
1403
1404	fmd_hdl_free(hdl, sysmessage, formatlen);
1405
1406	if (etm_debug_lvl >= 2) {
1407		etm_show_time(hdl, "post syslog post");
1408	}
1409
1410	return (0);
1411}
1412
1413
1414/*
1415 * etm_req_ver_negot - send an ETM control message to the other end requesting
1416 *			that the ETM protocol version be negotiated/set
1417 */
1418
1419static void
1420etm_req_ver_negot(fmd_hdl_t *hdl)
1421{
1422	etm_xport_addr_t	*addrv;		/* default dst addr(s) */
1423	etm_xport_conn_t	conn;		/* connection to other end */
1424	etm_proto_v1_ctl_hdr_t	*ctl_hdrp;	/* for CONTROL msg */
1425	size_t			hdr_sz;		/* sizeof header */
1426	uint8_t			*body_buf;	/* msg body buffer */
1427	uint32_t		body_sz;	/* sizeof *body_buf */
1428	ssize_t			i;		/* gen use */
1429
1430	/* populate an ETM control msg to send */
1431
1432	hdr_sz = sizeof (*ctl_hdrp);
1433	body_sz = (3 + 1);		/* version bytes plus null byte */
1434
1435	ctl_hdrp = fmd_hdl_zalloc(hdl, hdr_sz + body_sz, FMD_SLEEP);
1436
1437	ctl_hdrp->ctl_pp.pp_magic_num = htonl(ETM_PROTO_MAGIC_NUM);
1438	ctl_hdrp->ctl_pp.pp_proto_ver = ETM_PROTO_V1;
1439	ctl_hdrp->ctl_pp.pp_msg_type = ETM_MSG_TYPE_CONTROL;
1440	ctl_hdrp->ctl_pp.pp_sub_type = ETM_CTL_SEL_VER_NEGOT_REQ;
1441	ctl_hdrp->ctl_pp.pp_rsvd_pad = 0;
1442	etm_xid_ver_negot = etm_xid_cur;
1443	etm_xid_cur += ETM_XID_INC;
1444	ctl_hdrp->ctl_pp.pp_xid = htonl(etm_xid_ver_negot);
1445	ctl_hdrp->ctl_pp.pp_timeout = htonl(ETM_PROTO_V1_TIMEOUT_FOREVER);
1446	ctl_hdrp->ctl_len = htonl(body_sz);
1447
1448	body_buf = (void*)&ctl_hdrp->ctl_len;
1449	body_buf += sizeof (ctl_hdrp->ctl_len);
1450	*body_buf++ = ETM_PROTO_V3;
1451	*body_buf++ = ETM_PROTO_V2;
1452	*body_buf++ = ETM_PROTO_V1;
1453	*body_buf++ = '\0';
1454
1455	/*
1456	 * open and close a connection to send the ETM control msg
1457	 * to any/all of the default dst addrs
1458	 */
1459
1460	if ((addrv = etm_xport_get_ev_addrv(hdl, NULL)) == NULL) {
1461		fmd_hdl_error(hdl,
1462		    "error: bad ctl dst addrs errno %d\n", errno);
1463		etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++;
1464		goto func_ret;
1465	}
1466
1467	for (i = 0; addrv[i] != NULL; i++) {
1468
1469		if (etm_conn_open(hdl, "bad conn open during ver negot",
1470		    addrv[i], &conn) < 0) {
1471			continue;
1472		}
1473		if (etm_io_op(hdl, "bad io write on ctl hdr+body",
1474		    conn, ctl_hdrp, hdr_sz + body_sz, ETM_IO_OP_WR) >= 0) {
1475			etm_stats.etm_wr_hdr_control.fmds_value.ui64++;
1476			etm_stats.etm_wr_body_control.fmds_value.ui64++;
1477		}
1478		(void) etm_conn_close(hdl, "bad conn close during ver negot",
1479		    conn);
1480
1481	} /* foreach dst addr */
1482
1483func_ret:
1484
1485	if (addrv != NULL) {
1486		etm_xport_free_addrv(hdl, addrv);
1487	}
1488	fmd_hdl_free(hdl, ctl_hdrp, hdr_sz + body_sz);
1489
1490} /* etm_req_ver_negot() */
1491
1492
1493
1494/*
1495 * etm_iosvc_msg_enq - add element to tail of ETM iosvc msg queue
1496 * etm_iosvc_msg_deq - del element from head of ETM iosvc msg  queue
1497 * need to grab the mutex lock before calling this routine
1498 * return >0 for success, or -errno value
1499 */
1500static int
1501etm_iosvc_msg_enq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp)
1502{
1503	etm_iosvc_q_ele_t		*newp;	/* ptr to new msg q ele */
1504
1505	if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) {
1506		fmd_hdl_debug(hdl, "warning: enq to full msg queue\n");
1507		return (-E2BIG);
1508	}
1509
1510	newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP);
1511	(void) memcpy(newp, msgp, sizeof (*newp));
1512	newp->msg_nextp = NULL;
1513
1514	if (iosvc->msg_q_cur_len == 0) {
1515		iosvc->msg_q_head = newp;
1516	} else {
1517		iosvc->msg_q_tail->msg_nextp = newp;
1518	}
1519
1520	iosvc->msg_q_tail = newp;
1521	iosvc->msg_q_cur_len++;
1522	fmd_hdl_debug(hdl, "info: current msg queue length %d\n",
1523	    iosvc->msg_q_cur_len);
1524
1525	return (1);
1526
1527} /* etm_iosvc_msg_enq() */
1528
1529static int
1530etm_iosvc_msg_deq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp)
1531{
1532	etm_iosvc_q_ele_t	*oldp;	/* ptr to old msg q ele */
1533
1534	if (iosvc->msg_q_cur_len == 0) {
1535		fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n");
1536		return (-ENOENT);
1537	}
1538
1539	(void) memcpy(msgp, iosvc->msg_q_head, sizeof (*msgp));
1540	msgp->msg_nextp = NULL;
1541
1542	oldp = iosvc->msg_q_head;
1543	iosvc->msg_q_head = iosvc->msg_q_head->msg_nextp;
1544
1545	/*
1546	 * free the mem alloc-ed in etm_iosvc_msg_enq()
1547	 */
1548	fmd_hdl_free(hdl, oldp, sizeof (*oldp));
1549
1550	iosvc->msg_q_cur_len--;
1551	if (iosvc->msg_q_cur_len == 0) {
1552		iosvc->msg_q_tail = NULL;
1553	}
1554
1555	return (1);
1556
1557} /* etm_iosvc_msg_deq() */
1558
1559
1560/*
1561 * etm_msg_enq_head():
1562 * enq the msg to the head of the Q.
1563 * If the Q is full, drop the msg at the tail then enq the msg at head.
1564 * need to grab mutex lock iosvc->msg_q_lock before calling this routine.
1565 */
1566static void
1567etm_msg_enq_head(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc,
1568    etm_iosvc_q_ele_t *msg_ele)
1569{
1570
1571	etm_iosvc_q_ele_t	*newp;	/* iosvc msg ele ptr */
1572
1573	if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) {
1574		fmd_hdl_debug(fmd_hdl,
1575		    "warning: add to head of a full msg queue."
1576		    " Drop the msg at the tail\n");
1577		/*
1578		 * drop the msg at the tail
1579		 */
1580		newp = iosvc->msg_q_head;
1581		while (newp->msg_nextp != iosvc->msg_q_tail) {
1582			newp = newp->msg_nextp;
1583		}
1584
1585		/*
1586		 * free the msg in iosvc->msg_q_tail->msg
1587		 * free the mem pointed to by iosvc->msg_q_tail
1588		 */
1589		fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail->msg,
1590		    iosvc->msg_q_tail->msg_size);
1591		fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail, sizeof (*newp));
1592		iosvc->msg_q_tail = newp;
1593		iosvc->msg_q_tail->msg_nextp = NULL;
1594		iosvc->msg_q_cur_len--;
1595	}
1596
1597	/*
1598	 * enq the msg to the head
1599	 */
1600	newp = fmd_hdl_zalloc(fmd_hdl, sizeof (*newp), FMD_SLEEP);
1601	(void) memcpy(newp, msg_ele, sizeof (*newp));
1602	if (iosvc->msg_q_cur_len == 0) {
1603		newp->msg_nextp = NULL;
1604		iosvc->msg_q_tail = newp;
1605	} else {
1606		newp->msg_nextp = iosvc->msg_q_head;
1607	}
1608	iosvc->msg_q_head = newp;
1609	iosvc->msg_q_cur_len++;
1610} /* etm_msg_enq_head() */
1611
1612/*
1613 * etm_iosvc_cleanup():
1614 * Clean up an iosvc structure
1615 * 1) close the fmd_xprt if it has not been closed
1616 * 2) Terminate the send/revc threads
1617 * 3) If the clean_msg_q flag is set, free all fma events in the queue. In
1618 *    addition, if the chpt_remove flag is set, delete the checkpoint so that
1619 *    the events are not persisted.
1620 */
1621static void
1622etm_iosvc_cleanup(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, boolean_t clean_msg_q,
1623    boolean_t ckpt_remove)
1624{
1625
1626	etm_iosvc_q_ele_t	msg_ele;	/* io svc msg Q ele */
1627
1628	iosvc->thr_is_dying = 1;
1629
1630	iosvc->ds_hdl = DS_INVALID_HDL;
1631	if (iosvc->fmd_xprt != NULL) {
1632		fmd_xprt_close(fmd_hdl, iosvc->fmd_xprt);
1633		iosvc->fmd_xprt = NULL;
1634	} /* if fmd-xprt has been opened */
1635
1636	if (iosvc->send_tid != NULL) {
1637		fmd_thr_signal(fmd_hdl, iosvc->send_tid);
1638		fmd_thr_destroy(fmd_hdl, iosvc->send_tid);
1639		iosvc->send_tid = NULL;
1640	} /* if io svc send thread was created ok */
1641
1642	if (iosvc->recv_tid != NULL) {
1643		fmd_thr_signal(fmd_hdl, iosvc->recv_tid);
1644		fmd_thr_destroy(fmd_hdl, iosvc->recv_tid);
1645		iosvc->recv_tid = NULL;
1646	} /* if root domain recv thread was created */
1647
1648
1649	if (clean_msg_q) {
1650		iosvc->ldom_name[0] = '\0';
1651
1652		(void) pthread_mutex_lock(&iosvc->msg_q_lock);
1653		while (iosvc->msg_q_cur_len > 0) {
1654			(void) etm_iosvc_msg_deq(fmd_hdl, iosvc, &msg_ele);
1655			if (ckpt_remove == B_TRUE &&
1656			    msg_ele.ckpt_flag != ETM_CKPT_NOOP) {
1657				etm_ckpt_remove(fmd_hdl, &msg_ele);
1658			}
1659			fmd_hdl_free(fmd_hdl, msg_ele.msg, msg_ele.msg_size);
1660		}
1661		(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
1662	}
1663
1664	return;
1665
1666} /* etm_iosvc_cleanup() */
1667
1668/*
1669 * etm_iosvc_lookup(using ldom_name or ds_hdl when ldom_name is empty)
1670 * not found, create one, add to iosvc_list
1671 */
1672etm_iosvc_t *
1673etm_iosvc_lookup(fmd_hdl_t *fmd_hdl, char *ldom_name, ds_hdl_t ds_hdl,
1674    boolean_t iosvc_create)
1675{
1676	uint32_t		i;			/* for loop var */
1677	int32_t			first_empty_slot = -1;	/* remember that */
1678
1679	for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) {
1680		if (ldom_name[0] == '\0') {
1681			/*
1682			 * search by hdl passed in
1683			 * the only time this is used is at ds_unreg_cb time.
1684			 * there is no ldom name, only the valid ds_hdl.
1685			 * find an iosvc with the matching ds_hdl.
1686			 * ignore the iosvc_create flag, should never need to
1687			 * create an iosvc for ds_unreg_cb
1688			 */
1689			if (ds_hdl == iosvc_list[i].ds_hdl) {
1690				if (etm_debug_lvl >= 2) {
1691				fmd_hdl_debug(fmd_hdl,
1692			    "info: found an iosvc at slot %d w/ ds_hdl %d \n",
1693				    i, iosvc_list[i].ds_hdl);
1694				}
1695				if (iosvc_list[i].ldom_name[0] != '\0')
1696					if (etm_debug_lvl >= 2) {
1697						fmd_hdl_debug(fmd_hdl,
1698				    "info: found an iosvc w/ ldom_name %s \n",
1699						    iosvc_list[i].ldom_name);
1700				}
1701				return (&iosvc_list[i]);
1702			} else {
1703				continue;
1704			}
1705		} else if (iosvc_list[i].ldom_name[0] != '\0') {
1706			/*
1707			 * this is  an non-empty iosvc structure slot
1708			 */
1709			if (strcmp(ldom_name, iosvc_list[i].ldom_name) == 0) {
1710				/*
1711				 * found an iosvc structure that matches the
1712				 * passed in ldom_name, return the ptr
1713				 */
1714				if (etm_debug_lvl >= 2) {
1715					fmd_hdl_debug(fmd_hdl, "info: found an "
1716					    "iosvc at slot %d w/ ds_hdl %d \n",
1717					    i, iosvc_list[i].ds_hdl);
1718					fmd_hdl_debug(fmd_hdl, "info: found an "
1719					    "iosvc w/ ldom_name %s \n",
1720					    iosvc_list[i].ldom_name);
1721				}
1722				return (&iosvc_list[i]);
1723			} else {
1724				/*
1725				 * non-empty slot with no-matching name,
1726				 * move on to next slot.
1727				 */
1728				continue;
1729			}
1730		} else {
1731			/*
1732			 * found the 1st slot with ldom name being empty
1733			 * remember the slot #, will be used for creating one
1734			 */
1735			if (first_empty_slot == -1) {
1736				first_empty_slot = i;
1737			}
1738		}
1739	}
1740	if (iosvc_create == B_TRUE && first_empty_slot >= 0) {
1741		/*
1742		 * this is the case we need to add an iosvc at first_empty_slot
1743		 * for the ldom_name at iosvc_list[first_empty_slot]
1744		 */
1745		fmd_hdl_debug(fmd_hdl,
1746		    "info: create an iosvc with ldom name %s\n",
1747		    ldom_name);
1748		i = first_empty_slot;
1749		(void) memcpy(&iosvc_list[i], &io_svc, sizeof (etm_iosvc_t));
1750		(void) strcpy(iosvc_list[i].ldom_name, ldom_name);
1751		fmd_hdl_debug(fmd_hdl, "info: iosvc #%d has ldom name %s\n",
1752		    i, iosvc_list[i].ldom_name);
1753		return (&iosvc_list[i]);
1754	} else {
1755		return (NULL);
1756	}
1757
1758} /* etm_iosvc_lookup() */
1759
1760
1761/*
1762 * etm_ckpt_remove:
1763 * remove the ckpt for the iosvc element
1764 */
1765static void
1766etm_ckpt_remove(fmd_hdl_t *hdl, etm_iosvc_q_ele_t *ele) {
1767	int		err;			/* temp error */
1768	nvlist_t	*evp = NULL;		/* event pointer */
1769	etm_proto_v1_ev_hdr_t	*hdrp;		/* hdr for FMA_EVENT */
1770	char		*buf;			/* packed event pointer */
1771
1772	if ((ele->ckpt_flag == ETM_CKPT_NOOP) ||
1773	    (etm_ldom_type != LDOM_TYPE_CONTROL)) {
1774		return;
1775	}
1776
1777	/* the pointer to the packed event in the etm message */
1778	hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)ele->msg);
1779	buf = (char *)((ptrdiff_t)hdrp + sizeof (*hdrp)
1780	    + (1 * sizeof (hdrp->ev_lens[0])));
1781
1782	/* unpack it, then uncheckpoited it */
1783	if ((err = nvlist_unpack(buf, hdrp->ev_lens[0], &evp, 0)) != 0) {
1784		fmd_hdl_debug(hdl, "failed to unpack event(rc=%d)\n", err);
1785		return;
1786	}
1787	(void) etm_ckpt_delete(hdl, evp);
1788	nvlist_free(evp);
1789}
1790
1791/*
1792 * etm_send_ds_msg()
1793 * call ds_send_msg() to send the msg passed in.
1794 * timedcond_wait for the ACK to come back.
1795 * if the ACK doesn't come in the specified time, retrun -EAGAIN.
1796 * other wise, return 1.
1797 */
1798int
1799etm_send_ds_msg(fmd_hdl_t *fmd_hdl, boolean_t ckpt_remove, etm_iosvc_t *iosvc,
1800    etm_iosvc_q_ele_t *msg_ele, etm_proto_v1_ev_hdr_t *evhdrp)
1801{
1802	uint32_t		rc;		/* for return code  */
1803
1804	struct timeval		tv;
1805	struct timespec		timeout;
1806
1807
1808	/*
1809	 * call ds_send_msg(). Return (-EAGAIN) if not successful
1810	 */
1811	if ((rc = (*etm_ds_send_msg)(iosvc->ds_hdl, msg_ele->msg,
1812	    msg_ele->msg_size)) != 0) {
1813		fmd_hdl_debug(fmd_hdl, "info: ds_send_msg rc %d xid %d\n",
1814		    rc, evhdrp->ev_pp.pp_xid);
1815			return (-EAGAIN);
1816	}
1817
1818	/*
1819	 * wait on the cv for resp msg for cur_send_xid
1820	 */
1821	(void *) pthread_mutex_lock(&iosvc->msg_ack_lock);
1822
1823	(void) gettimeofday(&tv, 0);
1824	timeout.tv_sec = tv.tv_sec + etm_fma_resp_wait_time;
1825	timeout.tv_nsec = 0;
1826
1827	fmd_hdl_debug(fmd_hdl, "info: waiting on msg_ack_cv for ldom %s\n",
1828	    iosvc->ldom_name);
1829	rc = pthread_cond_timedwait(&iosvc->msg_ack_cv, &iosvc->msg_ack_lock,
1830	    &timeout);
1831	(void *) pthread_mutex_unlock(&iosvc->msg_ack_lock);
1832	fmd_hdl_debug(fmd_hdl,  "info: msg_ack_cv returns with rc %d\n", rc);
1833
1834	/*
1835	 * check to see if ack_ok is non-zero
1836	 * if non-zero, resp msg has been received
1837	 */
1838	if (iosvc->ack_ok != 0) {
1839		/*
1840		 * ACK came ok,  this send is successful,
1841		 * tell the caller ready to send next.
1842		 * free mem alloc-ed in
1843		 * etm_pack_ds_msg
1844		 */
1845		if (ckpt_remove == B_TRUE &&
1846		    etm_ldom_type == LDOM_TYPE_CONTROL) {
1847			etm_ckpt_remove(fmd_hdl, msg_ele);
1848		}
1849		fmd_hdl_free(fmd_hdl, msg_ele->msg, msg_ele->msg_size);
1850		iosvc->cur_send_xid++;
1851		return (1);
1852	} else {
1853		/*
1854		 * the ACK did not come on time
1855		 * tell the caller to resend cur_send_xid
1856		 */
1857		return (-EAGAIN);
1858	} /* iosvc->ack_ok != 0 */
1859} /* etm_send_ds_msg() */
1860
1861/*
1862 * both events from fmdo_send entry point and from SP are using the
1863 * etm_proto_v1_ev_hdr_t as its header and it will be the same header for all
1864 * ds send/recv msgs.
1865 * Idealy, we should use the hdr coming with the SP FMA event. Since fmdo_send
1866 * entry point can be called before FMA events from SP, we can't rely on
1867 * the SP FMA event hdr. Use the static hdr for packing ds msgs for fmdo_send
1868 * events.
1869 * return >0 for success, or -errno value
1870 * Design assumption: there is one FMA event per ds msg
1871 */
1872int
1873etm_pack_ds_msg(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc,
1874	etm_proto_v1_ev_hdr_t *ev_hdrp, size_t hdr_sz, nvlist_t *evp,
1875	etm_pack_msg_type_t msg_type, uint_t ckpt_opt)
1876{
1877	etm_proto_v1_ev_hdr_t	*hdrp;		/* for FMA_EVENT msg */
1878	uint32_t		*lenp;		/* ptr to FMA event length */
1879	size_t			evsz;		/* packed FMA event size */
1880	char 			*buf;
1881	uint32_t		rc;		/* for return code  */
1882	char 			*msg;		/* body of msg to be Qed */
1883
1884	etm_iosvc_q_ele_t	msg_ele;	/* io svc msg Q ele */
1885	etm_proto_v1_ev_hdr_t	*evhdrp;
1886
1887
1888	if (ev_hdrp == NULL) {
1889		hdrp = &iosvc_hdr;
1890	} else {
1891		hdrp = ev_hdrp;
1892	}
1893
1894	/*
1895	 * determine hdr_sz if 0, otherwise use the one passed in hdr_sz
1896	 */
1897
1898	if (hdr_sz == 0) {
1899		hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0]));
1900	}
1901
1902	/*
1903	 * determine evp size
1904	 */
1905	(void) nvlist_size(evp, &evsz, NV_ENCODE_XDR);
1906
1907	/* indicate 1 FMA event, no network encoding, and 0-terminate */
1908	lenp = &hdrp->ev_lens[0];
1909	*lenp = evsz;
1910
1911	/*
1912	 * now the total of mem needs to be alloc-ed/ds msg size is
1913	 * hdr_sz + evsz
1914	 * msg will be freed in etm_send_to_remote_root() after ds_send_msg()
1915	 */
1916	msg = fmd_hdl_zalloc(fmd_hdl, hdr_sz + evsz, FMD_SLEEP);
1917
1918
1919	/*
1920	 * copy hdr, 0 terminate the length vector,  and then evp
1921	 */
1922	(void) memcpy(msg, hdrp, sizeof (*hdrp));
1923	hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg);
1924	lenp = &hdrp->ev_lens[0];
1925	lenp++;
1926	*lenp = 0;
1927
1928	buf = fmd_hdl_zalloc(fmd_hdl, evsz, FMD_SLEEP);
1929	(void) nvlist_pack(evp, (char **)&buf, &evsz, NV_ENCODE_XDR, 0);
1930	(void) memcpy(msg + hdr_sz, buf, evsz);
1931	fmd_hdl_free(fmd_hdl, buf, evsz);
1932
1933	fmd_hdl_debug(fmd_hdl, "info: hdr_sz= %d evsz= %d in etm_pack_ds_msg"
1934	    "for ldom %s\n", hdr_sz, evsz, iosvc->ldom_name);
1935	msg_ele.msg = msg;
1936	msg_ele.msg_size = hdr_sz + evsz;
1937	msg_ele.ckpt_flag = ckpt_opt;
1938
1939	/*
1940	 * decide what to do with the msg:
1941	 * if SP ereports (msg_type == SP_MSG), always enq the msg
1942	 * if not SP ereports, ie, fmd xprt control msgs, enq it _only_ after
1943	 * resource.fm.xprt.run has been sent (which sets start_sending_Q to 1)
1944	 */
1945	if ((msg_type == SP_MSG) ||
1946	    (msg_type != SP_MSG) && (iosvc->start_sending_Q == 1)) {
1947		/*
1948		 * this is the case when the msg needs to be enq-ed
1949		 */
1950		(void) pthread_mutex_lock(&iosvc->msg_q_lock);
1951		rc = etm_iosvc_msg_enq(fmd_hdl, iosvc, &msg_ele);
1952		if ((rc > 0) && (ckpt_opt & ETM_CKPT_SAVE) &&
1953		    (etm_ldom_type == LDOM_TYPE_CONTROL)) {
1954			(void) etm_ckpt_add(fmd_hdl, evp);
1955		}
1956		if (iosvc->msg_q_cur_len == 1)
1957			(void) pthread_cond_signal(&iosvc->msg_q_cv);
1958		(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
1959	} else {
1960		/*
1961		 * fmd RDWR xprt procotol startup msgs, send it now!
1962		 */
1963		iosvc->ack_ok = 0;
1964		evhdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg_ele.msg);
1965		evhdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1;
1966		while (!iosvc->ack_ok && iosvc->ds_hdl != DS_INVALID_HDL &&
1967		    !etm_is_dying) {
1968			if (etm_send_ds_msg(fmd_hdl, B_FALSE, iosvc, &msg_ele,
1969			    evhdrp) < 0) {
1970				continue;
1971			}
1972		}
1973		if (msg_type == FMD_XPRT_RUN_MSG)
1974			iosvc->start_sending_Q = 1;
1975	}
1976
1977	return (rc);
1978
1979} /* etm_pack_ds_msg() */
1980
1981/*
1982 * Design_Note:	For all etm_resp_q_*() functions and etm_resp_q_* globals,
1983 *		the mutex etm_resp_q_lock must be held by the caller.
1984 */
1985
1986/*
1987 * etm_resp_q_enq - add element to tail of ETM responder queue
1988 * etm_resp_q_deq - del element from head of ETM responder queue
1989 *
1990 * return >0 for success, or -errno value
1991 */
1992
1993static int
1994etm_resp_q_enq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep)
1995{
1996	etm_resp_q_ele_t	*newp;	/* ptr to new resp q ele */
1997
1998	if (etm_resp_q_cur_len >= etm_resp_q_max_len) {
1999		fmd_hdl_debug(hdl, "warning: enq to full responder queue\n");
2000		etm_stats.etm_enq_drop_resp_q.fmds_value.ui64++;
2001		return (-E2BIG);
2002	}
2003
2004	newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP);
2005	(void) memcpy(newp, rqep, sizeof (*newp));
2006	newp->rqe_nextp = NULL;
2007
2008	if (etm_resp_q_cur_len == 0) {
2009		etm_resp_q_head = newp;
2010	} else {
2011		etm_resp_q_tail->rqe_nextp = newp;
2012	}
2013	etm_resp_q_tail = newp;
2014	etm_resp_q_cur_len++;
2015	etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len;
2016
2017	return (1);
2018
2019} /* etm_resp_q_enq() */
2020
2021static int
2022etm_resp_q_deq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep)
2023{
2024	etm_resp_q_ele_t	*oldp;	/* ptr to old resp q ele */
2025
2026	if (etm_resp_q_cur_len == 0) {
2027		fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n");
2028		etm_stats.etm_deq_drop_resp_q.fmds_value.ui64++;
2029		return (-ENOENT);
2030	}
2031
2032	(void) memcpy(rqep, etm_resp_q_head, sizeof (*rqep));
2033	rqep->rqe_nextp = NULL;
2034
2035	oldp = etm_resp_q_head;
2036	etm_resp_q_head = etm_resp_q_head->rqe_nextp;
2037	fmd_hdl_free(hdl, oldp, sizeof (*oldp));
2038
2039	etm_resp_q_cur_len--;
2040	etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len;
2041	if (etm_resp_q_cur_len == 0) {
2042		etm_resp_q_tail = NULL;
2043	}
2044
2045	return (1);
2046
2047} /* etm_resp_q_deq() */
2048
2049/*
2050 * etm_maybe_enq_response - check the given message header to see
2051 *				whether a response has been requested,
2052 *				if so then enqueue the given connection
2053 *				and header for later transport by the
2054 *				responder thread as an ETM response msg,
2055 *				return 0 for nop, >0 success, or -errno value
2056 */
2057
2058static ssize_t
2059etm_maybe_enq_response(fmd_hdl_t *hdl, etm_xport_conn_t conn,
2060    void *hdrp, uint32_t hdr_sz, int32_t resp_code)
2061{
2062	ssize_t			rv;		/* ret val */
2063	etm_proto_v1_pp_t	*ppp;		/* protocol preamble ptr */
2064	uint8_t			orig_msg_type;	/* orig hdr's message type */
2065	uint32_t		orig_timeout;	/* orig hdr's timeout */
2066	etm_resp_q_ele_t	rqe;		/* responder queue ele */
2067
2068	ppp = hdrp;
2069	orig_msg_type = ppp->pp_msg_type;
2070	orig_timeout = ppp->pp_timeout;
2071
2072	/* bail out now if no response is to be sent */
2073
2074	if (orig_timeout == ETM_PROTO_V1_TIMEOUT_NONE) {
2075		return (0);
2076	} /* if a nop */
2077
2078	if ((orig_msg_type != ETM_MSG_TYPE_FMA_EVENT) &&
2079	    (orig_msg_type != ETM_MSG_TYPE_ALERT) &&
2080	    (orig_msg_type != ETM_MSG_TYPE_CONTROL)) {
2081		fmd_hdl_debug(hdl, "warning: bad msg type 0x%x\n",
2082		    orig_msg_type);
2083		return (-EINVAL);
2084	} /* if inappropriate hdr for a response msg */
2085
2086	/*
2087	 * enqueue the msg hdr and nudge the responder thread
2088	 * if the responder queue was previously empty
2089	 */
2090
2091	rqe.rqe_conn = conn;
2092	rqe.rqe_hdrp = hdrp;
2093	rqe.rqe_hdr_sz = hdr_sz;
2094	rqe.rqe_resp_code = resp_code;
2095
2096	(void) pthread_mutex_lock(&etm_resp_q_lock);
2097
2098	if (etm_resp_q_cur_len == etm_resp_q_max_len)
2099		(void) pthread_cond_wait(&etm_resp_q_cv, &etm_resp_q_lock);
2100
2101	rv = etm_resp_q_enq(hdl, &rqe);
2102	if (etm_resp_q_cur_len == 1)
2103		(void) pthread_cond_signal(&etm_resp_q_cv);
2104	(void) pthread_mutex_unlock(&etm_resp_q_lock);
2105
2106	return (rv);
2107
2108} /* etm_maybe_enq_response() */
2109
2110/*
2111 * Design_Note:	We rely on the fact that all message types have
2112 *		a common protocol preamble; if this fact should
2113 *		ever change it may break the code below. We also
2114 *		rely on the fact that FMA_EVENT and CONTROL headers
2115 *		returned by etm_hdr_read() will be sized large enough
2116 *		to reuse them as RESPONSE headers if the remote endpt
2117 *		asked for a response via the pp_timeout field.
2118 */
2119
2120/*
2121 * etm_send_response - use the given message header and response code
2122 *			to construct an appropriate response message,
2123 *			and send it back on the given connection,
2124 *			return >0 for success, or -errno value
2125 */
2126
2127static ssize_t
2128etm_send_response(fmd_hdl_t *hdl, etm_xport_conn_t conn,
2129    void *hdrp, int32_t resp_code)
2130{
2131	ssize_t			rv;		/* ret val */
2132	etm_proto_v1_pp_t	*ppp;		/* protocol preamble ptr */
2133	etm_proto_v1_resp_hdr_t *resp_hdrp;	/* for RESPONSE msg */
2134	uint8_t			resp_body[4];	/* response body if needed */
2135	uint8_t			*resp_msg;	/* response hdr+body */
2136	size_t			hdr_sz;		/* sizeof response hdr */
2137	uint8_t			orig_msg_type;	/* orig hdr's message type */
2138
2139	ppp = hdrp;
2140	orig_msg_type = ppp->pp_msg_type;
2141
2142	if (etm_debug_lvl >= 2) {
2143		etm_show_time(hdl, "ante resp send");
2144	}
2145
2146	/* reuse the given header as a response header */
2147
2148	resp_hdrp = hdrp;
2149	resp_hdrp->resp_code = resp_code;
2150	resp_hdrp->resp_len = 0;		/* default is empty body */
2151
2152	if ((orig_msg_type == ETM_MSG_TYPE_CONTROL) &&
2153	    (ppp->pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ)) {
2154		resp_body[0] = ETM_PROTO_V2;
2155		resp_body[1] = ETM_PROTO_V3;
2156		resp_body[2] = 0;
2157		resp_hdrp->resp_len = 3;
2158	} /* if should send our/negotiated proto ver in resp body */
2159
2160	/* respond with the proto ver that was negotiated */
2161
2162	resp_hdrp->resp_pp.pp_proto_ver = etm_resp_ver;
2163	resp_hdrp->resp_pp.pp_msg_type = ETM_MSG_TYPE_RESPONSE;
2164	resp_hdrp->resp_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE;
2165
2166	/*
2167	 * send the whole response msg in one write, header and body;
2168	 * avoid the alloc-and-copy if we can reuse the hdr as the msg,
2169	 * ie, if the body is empty. update the response stats.
2170	 */
2171
2172	hdr_sz = sizeof (etm_proto_v1_resp_hdr_t);
2173
2174	resp_msg = hdrp;
2175	if (resp_hdrp->resp_len > 0) {
2176		resp_msg = fmd_hdl_zalloc(hdl, hdr_sz + resp_hdrp->resp_len,
2177		    FMD_SLEEP);
2178		(void) memcpy(resp_msg, resp_hdrp, hdr_sz);
2179		(void) memcpy(resp_msg + hdr_sz, resp_body,
2180		    resp_hdrp->resp_len);
2181	}
2182
2183	(void) pthread_mutex_lock(&etm_write_lock);
2184	rv = etm_io_op(hdl, "bad io write on resp msg", conn,
2185	    resp_msg, hdr_sz + resp_hdrp->resp_len, ETM_IO_OP_WR);
2186	(void) pthread_mutex_unlock(&etm_write_lock);
2187	if (rv < 0) {
2188		goto func_ret;
2189	}
2190
2191	etm_stats.etm_wr_hdr_response.fmds_value.ui64++;
2192	etm_stats.etm_wr_body_response.fmds_value.ui64++;
2193
2194	fmd_hdl_debug(hdl, "info: sent V%u RESPONSE msg to xport "
2195	    "xid 0x%x code %d len %u\n",
2196	    (unsigned int)resp_hdrp->resp_pp.pp_proto_ver,
2197	    resp_hdrp->resp_pp.pp_xid, resp_hdrp->resp_code,
2198	    resp_hdrp->resp_len);
2199func_ret:
2200
2201	if (resp_hdrp->resp_len > 0) {
2202		fmd_hdl_free(hdl, resp_msg, hdr_sz + resp_hdrp->resp_len);
2203	}
2204	if (etm_debug_lvl >= 2) {
2205		etm_show_time(hdl, "post resp send");
2206	}
2207	return (rv);
2208
2209} /* etm_send_response() */
2210
2211/*
2212 * etm_reset_xport - reset the transport layer (via fini;init)
2213 *			presumably for an error condition we cannot
2214 *			otherwise recover from (ex: hung LDC channel)
2215 *
2216 * caveats - no checking/locking is done to ensure an existing connection
2217 *		is idle during an xport reset; we don't want to deadlock
2218 *		and presumably the transport is stuck/unusable anyway
2219 */
2220
2221static void
2222etm_reset_xport(fmd_hdl_t *hdl)
2223{
2224	(void) etm_xport_fini(hdl);
2225	(void) etm_xport_init(hdl);
2226	etm_stats.etm_reset_xport.fmds_value.ui64++;
2227
2228} /* etm_reset_xport() */
2229
2230/*
2231 * etm_handle_new_conn - receive an ETM message sent from the other end via
2232 *			the given open connection, pull out any FMA events
2233 *			and post them to the local FMD (or handle any ETM
2234 *			control or response msg); when done, close the
2235 *			connection
2236 */
2237
2238static void
2239etm_handle_new_conn(fmd_hdl_t *hdl, etm_xport_conn_t conn)
2240{
2241	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* for FMA_EVENT msg */
2242	etm_proto_v1_ctl_hdr_t	*ctl_hdrp;	/* for CONTROL msg */
2243	etm_proto_v1_resp_hdr_t *resp_hdrp;	/* for RESPONSE msg */
2244	etm_proto_v3_sa_hdr_t	*sa_hdrp;	/* for ALERT msg */
2245	etm_iosvc_t		*iosvc;		/* iosvc data structure */
2246	int32_t			resp_code;	/* response code */
2247	ssize_t			enq_rv;		/* resp_q enqueue status */
2248	size_t			hdr_sz;		/* sizeof header */
2249	size_t			evsz;		/* FMA event size */
2250	uint8_t			*body_buf;	/* msg body buffer */
2251	uint32_t		body_sz;	/* sizeof body_buf */
2252	uint32_t		ev_cnt;		/* count of FMA events */
2253	uint8_t			*bp;		/* byte ptr within body_buf */
2254	nvlist_t		*evp;		/* ptr to unpacked FMA event */
2255	char			*class;		/* FMA event class */
2256	ssize_t			i, n;		/* gen use */
2257	int			should_reset_xport; /* bool to reset xport */
2258	char			ldom_name[MAX_LDOM_NAME]; /* ldom name */
2259	int			rc;		/* return code */
2260	uint64_t		did;		/* domain id */
2261
2262
2263	if (etm_debug_lvl >= 2) {
2264		etm_show_time(hdl, "ante conn handle");
2265	}
2266	fmd_hdl_debug(hdl, "info: handling new conn %p\n", conn);
2267
2268	should_reset_xport = 0;
2269	ev_hdrp = NULL;
2270	ctl_hdrp = NULL;
2271	resp_hdrp = NULL;
2272	sa_hdrp = NULL;
2273	body_buf = NULL;
2274	class = NULL;
2275	evp = NULL;
2276	resp_code = 0;	/* default is success */
2277	enq_rv = 0;	/* default is nop, ie, did not enqueue */
2278
2279	/* read a network decoded message header from the connection */
2280
2281	if ((ev_hdrp = etm_hdr_read(hdl, conn, &hdr_sz)) == NULL) {
2282		/* errno assumed set by above call */
2283		should_reset_xport = (errno == ENOTACTIVE);
2284		fmd_hdl_debug(hdl, "error: FMA event dropped: "
2285		    "bad hdr read errno %d\n", errno);
2286		etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++;
2287		goto func_ret;
2288	}
2289
2290	/*
2291	 * handle the message based on its preamble pp_msg_type
2292	 * which is known to be valid from etm_hdr_read() checks
2293	 */
2294
2295	if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) {
2296
2297		fmd_hdl_debug(hdl, "info: rcvd FMA_EVENT msg from xport\n");
2298
2299		/* allocate buf large enough for whole body / all FMA events */
2300
2301		body_sz = 0;
2302		for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) {
2303			body_sz += ev_hdrp->ev_lens[i];
2304		} /* for summing sizes of all FMA events */
2305		if (i > etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64)
2306			etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64 = i;
2307		ev_cnt = i;
2308
2309		if (etm_debug_lvl >= 1) {
2310			fmd_hdl_debug(hdl, "info: event lengths %u sum %u\n",
2311			    ev_cnt, body_sz);
2312		}
2313
2314		body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
2315
2316		/* read all the FMA events at once */
2317
2318		if ((n = etm_io_op(hdl, "FMA event dropped: "
2319		    "bad io read on event bodies", conn, body_buf, body_sz,
2320		    ETM_IO_OP_RD)) < 0) {
2321			should_reset_xport = (n == -ENOTACTIVE);
2322			etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++;
2323			goto func_ret;
2324		}
2325
2326		etm_stats.etm_rd_xport_bytes.fmds_value.ui64 += body_sz;
2327		etm_stats.etm_rd_body_fmaevent.fmds_value.ui64 += ev_cnt;
2328
2329		/*
2330		 * now that we've read the entire ETM msg from the conn,
2331		 * which avoids later ETM protocol framing errors if we didn't,
2332		 * check for dup msg/xid against last good FMD posting,
2333		 * if a dup then resend response but skip repost to FMD
2334		 */
2335
2336		if (ev_hdrp->ev_pp.pp_xid == etm_xid_posted_logged_ev) {
2337			enq_rv = etm_maybe_enq_response(hdl, conn,
2338			    ev_hdrp, hdr_sz, 0);
2339			fmd_hdl_debug(hdl, "info: skipping dup FMA event post "
2340			    "xid 0x%x\n", etm_xid_posted_logged_ev);
2341			etm_stats.etm_rd_dup_fmaevent.fmds_value.ui64++;
2342			goto func_ret;
2343		}
2344
2345		/* unpack each FMA event and post it to FMD */
2346
2347		bp = body_buf;
2348		for (i = 0; i < ev_cnt; i++) {
2349			if ((n = nvlist_unpack((char *)bp,
2350			    ev_hdrp->ev_lens[i], &evp, 0)) != 0) {
2351				resp_code = (-n);
2352				enq_rv = etm_maybe_enq_response(hdl, conn,
2353				    ev_hdrp, hdr_sz, resp_code);
2354				fmd_hdl_error(hdl, "error: FMA event dropped: "
2355				    "bad event body unpack errno %d\n", n);
2356				if (etm_debug_lvl >= 2) {
2357					fmd_hdl_debug(hdl, "info: FMA event "
2358					    "hexdump %d bytes:\n",
2359					    ev_hdrp->ev_lens[i]);
2360					etm_hexdump(hdl, bp,
2361					    ev_hdrp->ev_lens[i]);
2362				}
2363				etm_stats.etm_os_nvlist_unpack_fail.fmds_value.
2364				    ui64++;
2365				etm_stats.etm_rd_drop_fmaevent.fmds_value.
2366				    ui64++;
2367				bp += ev_hdrp->ev_lens[i];
2368				continue;
2369			}
2370
2371			if (etm_debug_lvl >= 1) {
2372				(void) nvlist_lookup_string(evp, FM_CLASS,
2373				    &class);
2374				if (class == NULL) {
2375					class = "NULL";
2376				}
2377				fmd_hdl_debug(hdl, "info: FMA event %p "
2378				    "class %s\n", evp, class);
2379			}
2380
2381			rc = nvlist_size(evp, &evsz, NV_ENCODE_XDR);
2382			fmd_hdl_debug(hdl,
2383			    "info: evp size before pack ds msg %d\n", evsz);
2384			ldom_name[0] = '\0';
2385			rc = etm_filter_find_ldom_id(hdl, evp, ldom_name,
2386			    MAX_LDOM_NAME, &did);
2387
2388			/*
2389			 * if rc is zero and the ldom_name is not "primary",
2390			 * the evp belongs to a root domain, put the evp in an
2391			 * outgoing etm queue,
2392			 * in all other cases, whether ldom_name is primary or
2393			 * can't find a ldom name, call etm_post_to_fmd
2394			 */
2395			if ((rc == 0) && strcmp(ldom_name, "primary") &&
2396			    strcmp(ldom_name, "")) {
2397				/*
2398				 * use the ldom_name, guaranteered at this point
2399				 * to be a valid ldom name/non-NULL, to find the
2400				 * iosvc data.
2401				 * add an iosvc struct if can not find one
2402				 */
2403				(void) pthread_mutex_unlock(&iosvc_list_lock);
2404				iosvc = etm_iosvc_lookup(hdl, ldom_name,
2405				    DS_INVALID_HDL, B_TRUE);
2406				(void) pthread_mutex_unlock(&iosvc_list_lock);
2407				if (iosvc == NULL) {
2408					fmd_hdl_debug(hdl,
2409					    "error: can't find iosvc for ldom "
2410					    "name %s\n", ldom_name);
2411				} else {
2412					resp_code = 0;
2413					(void) etm_pack_ds_msg(hdl, iosvc,
2414					    ev_hdrp, hdr_sz, evp,
2415					    SP_MSG, ETM_CKPT_SAVE);
2416					/*
2417					 * call the new fmd_xprt_log()
2418					 */
2419					fmd_xprt_log(hdl, etm_fmd_xprt, evp, 0);
2420					etm_xid_posted_logged_ev =
2421					    ev_hdrp->ev_pp.pp_xid;
2422				}
2423			} else {
2424				/*
2425				 * post the fma event to the control fmd
2426				 */
2427				resp_code = etm_post_to_fmd(hdl, etm_fmd_xprt,
2428				    evp);
2429				if (resp_code >= 0) {
2430					etm_xid_posted_logged_ev =
2431					    ev_hdrp->ev_pp.pp_xid;
2432				}
2433			}
2434
2435			evp = NULL;
2436			enq_rv = etm_maybe_enq_response(hdl, conn,
2437			    ev_hdrp, hdr_sz, resp_code);
2438			bp += ev_hdrp->ev_lens[i];
2439		} /* foreach FMA event in the body buffer */
2440
2441	} else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) {
2442
2443		ctl_hdrp = (void*)ev_hdrp;
2444
2445		fmd_hdl_debug(hdl, "info: rcvd CONTROL msg from xport\n");
2446		if (etm_debug_lvl >= 1) {
2447			fmd_hdl_debug(hdl, "info: ctl sel %d xid 0x%x\n",
2448			    (int)ctl_hdrp->ctl_pp.pp_sub_type,
2449			    ctl_hdrp->ctl_pp.pp_xid);
2450		}
2451
2452		/*
2453		 * if we have a VER_NEGOT_REQ read the body and validate
2454		 * the protocol version set contained therein,
2455		 * otherwise we have a PING_REQ (which has no body)
2456		 * and we [also] fall thru to the code which sends a
2457		 * response msg if the pp_timeout field requested one
2458		 */
2459
2460		if (ctl_hdrp->ctl_pp.pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ) {
2461
2462			body_sz = ctl_hdrp->ctl_len;
2463			body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
2464
2465			if ((n = etm_io_op(hdl, "bad io read on ctl body",
2466			    conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) {
2467				should_reset_xport = (n == -ENOTACTIVE);
2468				goto func_ret;
2469			}
2470
2471			/* complain if version set completely incompatible */
2472
2473			for (i = 0; i < body_sz; i++) {
2474				if ((body_buf[i] == ETM_PROTO_V1) ||
2475				    (body_buf[i] == ETM_PROTO_V2) ||
2476				    (body_buf[i] == ETM_PROTO_V3)) {
2477					break;
2478				}
2479			}
2480			if (i >= body_sz) {
2481				etm_stats.etm_ver_bad.fmds_value.ui64++;
2482				resp_code = (-EPROTO);
2483			}
2484
2485		} /* if got version set request */
2486
2487		etm_stats.etm_rd_body_control.fmds_value.ui64++;
2488
2489		enq_rv = etm_maybe_enq_response(hdl, conn,
2490		    ctl_hdrp, hdr_sz, resp_code);
2491
2492	} else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) {
2493
2494		resp_hdrp = (void*)ev_hdrp;
2495
2496		fmd_hdl_debug(hdl, "info: rcvd RESPONSE msg from xport\n");
2497		if (etm_debug_lvl >= 1) {
2498			fmd_hdl_debug(hdl, "info: resp xid 0x%x\n",
2499			    (int)resp_hdrp->resp_pp.pp_xid);
2500		}
2501
2502		body_sz = resp_hdrp->resp_len;
2503		body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
2504
2505		if ((n = etm_io_op(hdl, "bad io read on resp len",
2506		    conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) {
2507			should_reset_xport = (n == -ENOTACTIVE);
2508			goto func_ret;
2509		}
2510
2511		etm_stats.etm_rd_body_response.fmds_value.ui64++;
2512
2513		/*
2514		 * look up the xid to interpret the response body
2515		 *
2516		 * ping is a nop; for ver negot confirm that a supported
2517		 * protocol version was negotiated and remember which one
2518		 */
2519
2520		if ((resp_hdrp->resp_pp.pp_xid != etm_xid_ping) &&
2521		    (resp_hdrp->resp_pp.pp_xid != etm_xid_ver_negot)) {
2522			etm_stats.etm_xid_bad.fmds_value.ui64++;
2523			goto func_ret;
2524		}
2525
2526		if (resp_hdrp->resp_pp.pp_xid == etm_xid_ver_negot) {
2527			if ((body_buf[0] < ETM_PROTO_V1) ||
2528			    (body_buf[0] > ETM_PROTO_V3)) {
2529				etm_stats.etm_ver_bad.fmds_value.ui64++;
2530				goto func_ret;
2531			}
2532			etm_resp_ver = body_buf[0];
2533		} /* if have resp to last req to negotiate proto ver */
2534
2535	} else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_ALERT) {
2536
2537		sa_hdrp = (void*)ev_hdrp;
2538
2539		fmd_hdl_debug(hdl, "info: rcvd ALERT msg from xport\n");
2540		if (etm_debug_lvl >= 1) {
2541			fmd_hdl_debug(hdl, "info: sa sel %d xid 0x%x\n",
2542			    (int)sa_hdrp->sa_pp.pp_sub_type,
2543			    sa_hdrp->sa_pp.pp_xid);
2544		}
2545
2546		body_sz = sa_hdrp->sa_len;
2547		body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
2548
2549		if ((n = etm_io_op(hdl, "bad io read on sa body",
2550		    conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) {
2551			should_reset_xport = (n == -ENOTACTIVE);
2552			goto func_ret;
2553		}
2554
2555		etm_stats.etm_rd_body_alert.fmds_value.ui64++;
2556
2557		/*
2558		 * now that we've read the entire ETM msg from the conn,
2559		 * which avoids later ETM protocol framing errors if we didn't,
2560		 * check for dup msg/xid against last good syslog posting,
2561		 * if a dup then resend response but skip repost to syslog
2562		 */
2563
2564		if (sa_hdrp->sa_pp.pp_xid == etm_xid_posted_sa) {
2565			enq_rv = etm_maybe_enq_response(hdl, conn,
2566			    sa_hdrp, hdr_sz, 0);
2567			fmd_hdl_debug(hdl, "info: skipping dup ALERT post "
2568			    "xid 0x%x\n", etm_xid_posted_sa);
2569			etm_stats.etm_rd_dup_alert.fmds_value.ui64++;
2570			goto func_ret;
2571		}
2572
2573		resp_code = etm_post_to_syslog(hdl, sa_hdrp->sa_priority,
2574		    body_sz, body_buf);
2575		if (resp_code >= 0) {
2576			etm_xid_posted_sa = sa_hdrp->sa_pp.pp_xid;
2577		}
2578		enq_rv = etm_maybe_enq_response(hdl, conn,
2579		    sa_hdrp, hdr_sz, resp_code);
2580	} /* whether we have a FMA_EVENT, CONTROL, RESPONSE or ALERT msg */
2581
2582func_ret:
2583
2584	if (etm_debug_lvl >= 2) {
2585		etm_show_time(hdl, "post conn handle");
2586	}
2587
2588	/*
2589	 * if no responder ele was enqueued, close the conn now
2590	 * and free the ETM msg hdr; the ETM msg body is not needed
2591	 * by the responder thread and should always be freed here
2592	 */
2593
2594	if (enq_rv <= 0) {
2595		(void) etm_conn_close(hdl, "bad conn close after msg recv",
2596		    conn);
2597		if (ev_hdrp != NULL) {
2598			fmd_hdl_free(hdl, ev_hdrp, hdr_sz);
2599		}
2600	}
2601	if (body_buf != NULL) {
2602		fmd_hdl_free(hdl, body_buf, body_sz);
2603	}
2604	if (should_reset_xport) {
2605		etm_reset_xport(hdl);
2606	}
2607} /* etm_handle_new_conn() */
2608
2609/*
2610 * etm_handle_bad_accept - recover from a failed connection acceptance
2611 */
2612
2613static void
2614etm_handle_bad_accept(fmd_hdl_t *hdl, int nev)
2615{
2616	int	should_reset_xport; /* bool to reset xport */
2617
2618	should_reset_xport = (nev == -ENOTACTIVE);
2619	fmd_hdl_debug(hdl, "error: bad conn accept errno %d\n", (-nev));
2620	etm_stats.etm_xport_accept_fail.fmds_value.ui64++;
2621	(void) etm_sleep(etm_bad_acc_to_sec); /* avoid spinning CPU */
2622	if (should_reset_xport) {
2623		etm_reset_xport(hdl);
2624	}
2625} /* etm_handle_bad_accept() */
2626
2627/*
2628 * etm_server - loop forever accepting new connections
2629 *		using the given FMD handle,
2630 *		handling any ETM msgs sent from the other side
2631 *		via each such connection
2632 */
2633
2634static void
2635etm_server(void *arg)
2636{
2637	etm_xport_conn_t	conn;		/* connection handle */
2638	int			nev;		/* -errno val */
2639	fmd_hdl_t		*hdl;		/* FMD handle */
2640
2641	hdl = arg;
2642
2643	fmd_hdl_debug(hdl, "info: connection server starting\n");
2644
2645	/*
2646	 * Restore the checkpointed events and dispatch them before starting to
2647	 * receive more events from the sp.
2648	 */
2649	etm_ckpt_recover(hdl);
2650
2651	while (!etm_is_dying) {
2652
2653		if ((conn = etm_xport_accept(hdl, NULL)) == NULL) {
2654			/* errno assumed set by above call */
2655			nev = (-errno);
2656			if (etm_is_dying) {
2657				break;
2658			}
2659			etm_handle_bad_accept(hdl, nev);
2660			continue;
2661		}
2662
2663		/* handle the new message/connection, closing it when done */
2664
2665		etm_handle_new_conn(hdl, conn);
2666
2667	} /* while accepting new connections until ETM dies */
2668
2669	/* ETM is dying (probably due to "fmadm unload etm") */
2670
2671	fmd_hdl_debug(hdl, "info: connection server is dying\n");
2672
2673} /* etm_server() */
2674
2675/*
2676 * etm_responder - loop forever waiting for new responder queue elements
2677 *		to be enqueued, for each one constructing and sending
2678 *		an ETM response msg to the other side, and closing its
2679 *		associated connection when appropriate
2680 *
2681 *	this thread exists to ensure that the etm_server() thread
2682 *	never pends indefinitely waiting on the xport write lock, and is
2683 *	hence always available to accept new connections and handle
2684 *	incoming messages
2685 *
2686 *	this design relies on the fact that each connection accepted and
2687 *	returned by the ETM xport layer is unique, and each can be closed
2688 *	independently of the others while multiple connections are
2689 *	outstanding
2690 */
2691
2692static void
2693etm_responder(void *arg)
2694{
2695	ssize_t			n;		/* gen use */
2696	fmd_hdl_t		*hdl;		/* FMD handle */
2697	etm_resp_q_ele_t	rqe;		/* responder queue ele */
2698
2699	hdl = arg;
2700
2701	fmd_hdl_debug(hdl, "info: responder server starting\n");
2702
2703	while (!etm_is_dying) {
2704
2705		(void) pthread_mutex_lock(&etm_resp_q_lock);
2706
2707		while (etm_resp_q_cur_len == 0) {
2708			(void) pthread_cond_wait(&etm_resp_q_cv,
2709			    &etm_resp_q_lock);
2710			if (etm_is_dying) {
2711				(void) pthread_mutex_unlock(&etm_resp_q_lock);
2712				goto func_ret;
2713			}
2714		} /* while the responder queue is empty, wait to be nudged */
2715
2716		/*
2717		 * for every responder ele that has been enqueued,
2718		 * dequeue and send it as an ETM response msg,
2719		 * closing its associated conn and freeing its hdr
2720		 *
2721		 * enter the queue draining loop holding the responder
2722		 * queue lock, but do not hold the lock indefinitely
2723		 * (the actual send may pend us indefinitely),
2724		 * so that other threads will never pend for long
2725		 * trying to enqueue a new element
2726		 */
2727
2728		while (etm_resp_q_cur_len > 0) {
2729
2730			(void) etm_resp_q_deq(hdl, &rqe);
2731
2732			if ((etm_resp_q_cur_len + 1) == etm_resp_q_max_len)
2733				(void) pthread_cond_signal(&etm_resp_q_cv);
2734
2735			(void) pthread_mutex_unlock(&etm_resp_q_lock);
2736
2737			if ((n = etm_send_response(hdl, rqe.rqe_conn,
2738			    rqe.rqe_hdrp, rqe.rqe_resp_code)) < 0) {
2739				fmd_hdl_error(hdl, "error: bad resp send "
2740				    "errno %d\n", (-n));
2741			}
2742
2743			(void) etm_conn_close(hdl, "bad conn close after resp",
2744			    rqe.rqe_conn);
2745			fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz);
2746
2747			if (etm_is_dying) {
2748				goto func_ret;
2749			}
2750			(void) pthread_mutex_lock(&etm_resp_q_lock);
2751
2752		} /* while draining the responder queue */
2753
2754		(void) pthread_mutex_unlock(&etm_resp_q_lock);
2755
2756	} /* while awaiting and sending resp msgs until ETM dies */
2757
2758func_ret:
2759
2760	/* ETM is dying (probably due to "fmadm unload etm") */
2761
2762	fmd_hdl_debug(hdl, "info: responder server is dying\n");
2763
2764	(void) pthread_mutex_lock(&etm_resp_q_lock);
2765	if (etm_resp_q_cur_len > 0) {
2766		fmd_hdl_error(hdl, "warning: %d response msgs dropped\n",
2767		    (int)etm_resp_q_cur_len);
2768		while (etm_resp_q_cur_len > 0) {
2769			(void) etm_resp_q_deq(hdl, &rqe);
2770			(void) etm_conn_close(hdl, "bad conn close after deq",
2771			    rqe.rqe_conn);
2772			fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz);
2773		}
2774	}
2775	(void) pthread_mutex_unlock(&etm_resp_q_lock);
2776
2777} /* etm_responder() */
2778
2779static void *
2780etm_init_alloc(size_t size)
2781{
2782	return (fmd_hdl_alloc(init_hdl, size, FMD_SLEEP));
2783}
2784
2785static void
2786etm_init_free(void *addr, size_t size)
2787{
2788	fmd_hdl_free(init_hdl, addr, size);
2789}
2790
2791/*
2792 * ---------------------root ldom support functions -----------------------
2793 */
2794
2795/*
2796 * use a static array async_event_q instead of dynamicaly allocated mem  queue
2797 * for etm_async_q_enq and etm_async_q_deq.
2798 * This is not running in an fmd aux thread, can't use the fmd_hdl_* funcs.
2799 * caller needs to grab the mutex lock before calling this func.
2800 * return >0 for success, or -errno value
2801 */
2802static int
2803etm_async_q_enq(etm_async_event_ele_t *async_e)
2804{
2805
2806	if (etm_async_q_cur_len >= etm_async_q_max_len) {
2807		/* etm_stats.etm_enq_drop_async_q.fmds_value.ui64++; */
2808		return (-E2BIG);
2809	}
2810
2811	(void) memcpy(&async_event_q[etm_async_q_tail], async_e,
2812	    sizeof (*async_e));
2813
2814	etm_async_q_tail++;
2815	if (etm_async_q_tail == etm_async_q_max_len) {
2816		etm_async_q_tail = 0;
2817	}
2818	etm_async_q_cur_len++;
2819
2820/* etm_stats.etm_async_q_cur_len.fmds_value.ui64 = etm_async_q_cur_len; */
2821
2822	return (1);
2823
2824} /* etm_async_q_enq() */
2825
2826
2827static int
2828etm_async_q_deq(etm_async_event_ele_t *async_e)
2829{
2830
2831	if (etm_async_q_cur_len == 0) {
2832		/* etm_stats.etm_deq_drop_async_q.fmds_value.ui64++; */
2833		return (-ENOENT);
2834	}
2835
2836	(void) memcpy(async_e, &async_event_q[etm_async_q_head],
2837	    sizeof (*async_e));
2838
2839	etm_async_q_head++;
2840	if (etm_async_q_head == etm_async_q_max_len) {
2841		etm_async_q_head = 0;
2842	}
2843	etm_async_q_cur_len--;
2844
2845	return (1);
2846} /* etm_async_q_deq */
2847
2848
2849/*
2850 * setting up the fields in iosvc at DS_REG_CB time
2851 */
2852void
2853etm_iosvc_setup(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc,
2854	etm_async_event_ele_t *async_e)
2855{
2856	iosvc->ds_hdl = async_e->ds_hdl;
2857	iosvc->cur_send_xid = 0;
2858	iosvc->xid_posted_ev = 0;
2859	iosvc->start_sending_Q = 0;
2860
2861	/*
2862	 * open the fmd xprt if it
2863	 * hasn't been previously opened
2864	 */
2865	fmd_hdl_debug(fmd_hdl,  "info: before fmd_xprt_open ldom_name is %s\n",
2866	    async_e->ldom_name);
2867
2868	if (iosvc->fmd_xprt == NULL) {
2869		iosvc->fmd_xprt = fmd_xprt_open(fmd_hdl, flags, NULL, iosvc);
2870	}
2871
2872	iosvc->thr_is_dying = 0;
2873	if (iosvc->recv_tid == NULL) {
2874		iosvc->recv_tid = fmd_thr_create(fmd_hdl,
2875		    etm_recv_from_remote_root, iosvc);
2876	}
2877	if (iosvc->send_tid == NULL) {
2878		iosvc->send_tid = fmd_thr_create(fmd_hdl,
2879		    etm_send_to_remote_root, iosvc);
2880	}
2881} /* etm_iosvc_setup() */
2882
2883
2884/*
2885 * ds userland interface ds_reg_cb  callback func
2886 */
2887
2888/* ARGSUSED */
2889static void
2890etm_iosvc_reg_handler(ds_hdl_t ds_hdl, ds_cb_arg_t arg, ds_ver_t *ver,
2891	ds_domain_hdl_t dhdl)
2892{
2893	etm_async_event_ele_t	async_ele;
2894
2895
2896	/*
2897	 * do version check here.
2898	 * checked the ver received here against etm_iosvc_vers here
2899	 */
2900	if (etm_iosvc_vers[0].major != ver->major ||
2901	    etm_iosvc_vers[0].minor != ver->minor) {
2902		/*
2903		 * can't log an fmd debug msg,
2904		 * not running in an fmd aux thread
2905		 */
2906		return;
2907	}
2908
2909	/*
2910	 * the callback should have a valid ldom_name
2911	 * can't log fmd debugging msg here since this is not in an fmd aux
2912	 * thread. log fmd debug msg in etm_async_event_handle()
2913	 */
2914	async_ele.ds_hdl = ds_hdl;
2915	async_ele.dhdl = dhdl;
2916	async_ele.ldom_name[0] = '\0';
2917	async_ele.event_type = ETM_ASYNC_EVENT_DS_REG_CB;
2918	(void) pthread_mutex_lock(&etm_async_event_q_lock);
2919	(void) etm_async_q_enq(&async_ele);
2920	if (etm_async_q_cur_len == 1)
2921		(void) pthread_cond_signal(&etm_async_event_q_cv);
2922	(void) pthread_mutex_unlock(&etm_async_event_q_lock);
2923
2924} /* etm_iosvc_reg_handler */
2925
2926
2927/*
2928 * ds userland interface ds_unreg_cb  callback func
2929 */
2930
2931/*ARGSUSED*/
2932static void
2933etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg)
2934{
2935	etm_async_event_ele_t	async_ele;
2936
2937	/*
2938	 * fill in async_ele and enqueue async_ele
2939	 */
2940	async_ele.ldom_name[0] = '\0';
2941	async_ele.ds_hdl = hdl;
2942	async_ele.event_type = ETM_ASYNC_EVENT_DS_UNREG_CB;
2943	(void) pthread_mutex_lock(&etm_async_event_q_lock);
2944	(void) etm_async_q_enq(&async_ele);
2945	if (etm_async_q_cur_len == 1)
2946		(void) pthread_cond_signal(&etm_async_event_q_cv);
2947	(void) pthread_mutex_unlock(&etm_async_event_q_lock);
2948} /* etm_iosvc_unreg_handler */
2949
2950/*
2951 * ldom event registration callback func
2952 */
2953
2954/* ARGSUSED */
2955static void
2956ldom_event_handler(char *ldom_name, ldom_event_t event, ldom_cb_arg_t data)
2957{
2958	etm_async_event_ele_t	async_ele;
2959
2960	/*
2961	 * the callback will have a valid ldom_name
2962	 */
2963	async_ele.ldom_name[0] = '\0';
2964	if (ldom_name)
2965		(void) strcpy(async_ele.ldom_name, ldom_name);
2966	async_ele.ds_hdl = DS_INVALID_HDL;
2967
2968	/*
2969	 * fill in async_ele and enq async_ele
2970	 */
2971	switch (event) {
2972	case LDOM_EVENT_BIND:
2973		async_ele.event_type = ETM_ASYNC_EVENT_LDOM_BIND;
2974		break;
2975	case LDOM_EVENT_UNBIND:
2976		async_ele.event_type = ETM_ASYNC_EVENT_LDOM_UNBIND;
2977		break;
2978	case LDOM_EVENT_ADD:
2979		async_ele.event_type = ETM_ASYNC_EVENT_LDOM_ADD;
2980		break;
2981	case LDOM_EVENT_REMOVE:
2982		async_ele.event_type = ETM_ASYNC_EVENT_LDOM_REMOVE;
2983		break;
2984	default:
2985		/*
2986		 * for all other ldom events, do nothing
2987		 */
2988		return;
2989	} /* switch (event) */
2990
2991	(void) pthread_mutex_lock(&etm_async_event_q_lock);
2992	(void) etm_async_q_enq(&async_ele);
2993	if (etm_async_q_cur_len == 1)
2994		(void) pthread_cond_signal(&etm_async_event_q_cv);
2995	(void) pthread_mutex_unlock(&etm_async_event_q_lock);
2996
2997} /* ldom_event_handler */
2998
2999
3000/*
3001 * This is running as an fmd aux thread.
3002 * This is the func that actually handle the events, which include:
3003 * 1. ldom events. ldom events are  on Control Domain only
3004 * 2. any DS userland callback funcs
3005 * these events are already Q-ed in the async_event_ele_q
3006 * deQ and process the events accordingly
3007 */
3008static void
3009etm_async_event_handler(void *arg)
3010{
3011
3012	fmd_hdl_t		*fmd_hdl = (fmd_hdl_t *)arg;
3013	etm_iosvc_t		*iosvc;		/* ptr 2 iosvc struct */
3014	etm_async_event_ele_t	async_e;
3015
3016	fmd_hdl_debug(fmd_hdl, "info: etm_async_event_handler starting\n");
3017	/*
3018	 *  handle etm is not dying and Q len > 0
3019	 */
3020	while (!etm_is_dying) {
3021		/*
3022		 * grab the lock to check the Q len
3023		 */
3024		(void) pthread_mutex_lock(&etm_async_event_q_lock);
3025		fmd_hdl_debug(fmd_hdl, "info: etm_async_q_cur_len %d\n",
3026		    etm_async_q_cur_len);
3027
3028		while (etm_async_q_cur_len > 0) {
3029			(void) etm_async_q_deq(&async_e);
3030			(void) pthread_mutex_unlock(&etm_async_event_q_lock);
3031			fmd_hdl_debug(fmd_hdl,
3032			    "info: processing an async event type %d ds_hdl"
3033			    " %d\n", async_e.event_type, async_e.ds_hdl);
3034			if (async_e.ldom_name[0] != '\0') {
3035				fmd_hdl_debug(fmd_hdl,
3036				    "info: procssing async evt ldom_name %s\n",
3037				    async_e.ldom_name);
3038			}
3039
3040			/*
3041			 * at this point, if async_e.ldom_name is not NULL,
3042			 * we have a valid iosvc strcut ptr.
3043			 * the only time async_e.ldom_name is NULL is  at
3044			 * ds_unreg_cb()
3045			 */
3046			switch (async_e.event_type)  {
3047			case ETM_ASYNC_EVENT_LDOM_UNBIND:
3048			case ETM_ASYNC_EVENT_LDOM_REMOVE:
3049				/*
3050				 * we have a valid ldom_name,
3051				 * etm_lookup_struct(ldom_name)
3052				 * do nothing if can't find an iosvc
3053				 * no iosvc clean up to do
3054				 */
3055				(void) pthread_mutex_lock(
3056				    &iosvc_list_lock);
3057				iosvc = etm_iosvc_lookup(fmd_hdl,
3058				    async_e.ldom_name,
3059				    async_e.ds_hdl, B_FALSE);
3060				if (iosvc == NULL) {
3061					fmd_hdl_debug(fmd_hdl,
3062					    "error: can't find iosvc for ldom "
3063					    "name %s\n",
3064					    async_e.ldom_name);
3065					(void) pthread_mutex_unlock(
3066					    &iosvc_list_lock);
3067					break;
3068				}
3069				/*
3070				 * Clean up the queue, delete all messages and
3071				 * do not persist checkpointed fma events.
3072				 */
3073				etm_iosvc_cleanup(fmd_hdl, iosvc, B_TRUE,
3074				    B_TRUE);
3075				(void) pthread_mutex_unlock(
3076				    &iosvc_list_lock);
3077				break;
3078
3079			case ETM_ASYNC_EVENT_LDOM_BIND:
3080
3081				/*
3082				 * create iosvc if it has not been
3083				 * created
3084				 * async_e.ds_hdl is invalid
3085				 * async_e.ldom_name is valid ldom_name
3086				 */
3087				(void) pthread_mutex_lock(
3088				    &iosvc_list_lock);
3089				iosvc = etm_iosvc_lookup(fmd_hdl,
3090				    async_e.ldom_name,
3091				    async_e.ds_hdl, B_TRUE);
3092				if (iosvc == NULL) {
3093					fmd_hdl_debug(fmd_hdl,
3094					    "error: can't create iosvc for "
3095					    "async evnt %d\n",
3096					    async_e.event_type);
3097					(void) pthread_mutex_unlock(
3098					    &iosvc_list_lock);
3099					break;
3100				}
3101				(void) strcpy(iosvc->ldom_name,
3102				    async_e.ldom_name);
3103				iosvc->ds_hdl = async_e.ds_hdl;
3104				(void) pthread_mutex_unlock(
3105				    &iosvc_list_lock);
3106				break;
3107
3108			case ETM_ASYNC_EVENT_DS_REG_CB:
3109				if (etm_ldom_type == LDOM_TYPE_CONTROL) {
3110					/*
3111					 * find the root ldom name from
3112					 * ldom domain hdl/id
3113					 */
3114					if (etm_filter_find_ldom_name(
3115					    fmd_hdl, async_e.dhdl,
3116					    async_e.ldom_name,
3117					    MAX_LDOM_NAME) != 0) {
3118						fmd_hdl_debug(fmd_hdl,
3119						    "error: can't find root "
3120						    "domain name from did %d\n",
3121						    async_e.dhdl);
3122						break;
3123					} else {
3124						fmd_hdl_debug(fmd_hdl,
3125						    "info: etm_filter_find_"
3126						    "ldom_name returned %s\n",
3127						    async_e.ldom_name);
3128					}
3129					/*
3130					 * now we should have a valid
3131					 * root domain name.
3132					 * lookup the iosvc struct
3133					 * associated with the ldom_name
3134					 * and init the iosvc struct
3135					 */
3136					(void) pthread_mutex_lock(
3137					    &iosvc_list_lock);
3138					iosvc = etm_iosvc_lookup(
3139					    fmd_hdl, async_e.ldom_name,
3140					    async_e.ds_hdl, B_TRUE);
3141					if (iosvc == NULL) {
3142						fmd_hdl_debug(fmd_hdl,
3143						    "error: can't create iosvc "
3144						    "for async evnt %d\n",
3145						    async_e.event_type);
3146						(void) pthread_mutex_unlock(
3147						    &iosvc_list_lock);
3148						break;
3149					}
3150
3151					etm_iosvc_setup(fmd_hdl, iosvc,
3152					    &async_e);
3153					(void) pthread_mutex_unlock(
3154					    &iosvc_list_lock);
3155				} else {
3156					iosvc = &io_svc;
3157					(void) strcpy(iosvc->ldom_name,
3158					    async_e.ldom_name);
3159
3160					etm_iosvc_setup(fmd_hdl, iosvc,
3161					    &async_e);
3162				}
3163				break;
3164
3165			case ETM_ASYNC_EVENT_DS_UNREG_CB:
3166				/*
3167				 * decide which iosvc struct to perform
3168				 * this UNREG callback on.
3169				 */
3170				if (etm_ldom_type == LDOM_TYPE_CONTROL) {
3171					(void) pthread_mutex_lock(
3172					    &iosvc_list_lock);
3173					/*
3174					 * lookup the iosvc struct w/
3175					 * ds_hdl
3176					 */
3177					iosvc = etm_iosvc_lookup(
3178					    fmd_hdl, async_e.ldom_name,
3179					    async_e.ds_hdl, B_FALSE);
3180					if (iosvc == NULL) {
3181						fmd_hdl_debug(fmd_hdl,
3182						    "error: can't find iosvc "
3183						    "for async evnt %d\n",
3184						    async_e.event_type);
3185					(void) pthread_mutex_unlock(
3186					    &iosvc_list_lock);
3187						break;
3188					}
3189
3190					/*
3191					 * ds_hdl and fmd_xprt_open
3192					 * go hand to hand together
3193					 * after unreg_cb,
3194					 * ds_hdl is INVALID and
3195					 * fmd_xprt is closed.
3196					 * the ldom name and the msg Q
3197					 * remains in iosvc_list
3198					 */
3199					if (iosvc->ldom_name != '\0')
3200						fmd_hdl_debug(fmd_hdl,
3201						    "info: iosvc  w/ ldom_name "
3202						    "%s \n", iosvc->ldom_name);
3203
3204					/*
3205					 * destroy send/recv threads and
3206					 * other clean up on Control side.
3207					 */
3208					etm_iosvc_cleanup(fmd_hdl, iosvc,
3209					    B_FALSE, B_FALSE);
3210					(void) pthread_mutex_unlock(
3211					    &iosvc_list_lock);
3212				} else {
3213					iosvc = &io_svc;
3214					/*
3215					 * destroy send/recv threads and
3216					 * then clean up on Root side.
3217					 */
3218					etm_iosvc_cleanup(fmd_hdl, iosvc,
3219					    B_FALSE, B_FALSE);
3220				}
3221				break;
3222
3223			default:
3224				/*
3225				 * for all other events, etm doesn't care.
3226				 * already logged an fmd info msg w/
3227				 * the event type. Do nothing here.
3228				 */
3229				break;
3230			} /* switch (async_e.event_type) */
3231
3232			if (etm_ldom_type == LDOM_TYPE_CONTROL) {
3233				etm_filter_handle_ldom_event(fmd_hdl,
3234				    async_e.event_type, async_e.ldom_name);
3235			}
3236
3237			/*
3238			 * grab the lock to check the q length again
3239			 */
3240			(void) pthread_mutex_lock(&etm_async_event_q_lock);
3241
3242			if (etm_is_dying) {
3243				break;
3244			}
3245		}	/* etm_async_q_cur_len */
3246
3247		/*
3248		 * we have the mutex lock at this point, whether
3249		 * . etm_is_dying  and/or
3250		 * . q_len == 0
3251		 */
3252		if (!etm_is_dying && etm_async_q_cur_len == 0) {
3253			fmd_hdl_debug(fmd_hdl,
3254			    "info: cond wait on async_event_q_cv\n");
3255			(void) pthread_cond_wait(&etm_async_event_q_cv,
3256			    &etm_async_event_q_lock);
3257			fmd_hdl_debug(fmd_hdl,
3258			    "info: cond wait on async_event_q_cv rtns\n");
3259		}
3260		(void) pthread_mutex_unlock(&etm_async_event_q_lock);
3261	} /* etm_is_dying */
3262
3263	fmd_hdl_debug(fmd_hdl,
3264	    "info: etm async event handler thread exiting\n");
3265
3266} /* etm_async_event_handler */
3267
3268/*
3269 * deQ what's in iosvc msg Q
3270 * send iosvc_msgp to the remote io svc ldom by calling ds_send_msg()
3271 * the iosvc_msgp already has the packed msg, which is hdr + 1 fma event
3272 */
3273static void
3274etm_send_to_remote_root(void *arg)
3275{
3276
3277	etm_iosvc_t		*iosvc = (etm_iosvc_t *)arg;	/* iosvc ptr */
3278	etm_iosvc_q_ele_t	msg_ele;	/* iosvc msg ele */
3279	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* hdr for FMA_EVENT */
3280	fmd_hdl_t		*fmd_hdl = init_hdl;	/* fmd handle */
3281
3282
3283	fmd_hdl_debug(fmd_hdl,
3284	    "info: send to remote iosvc starting w/ ldom_name %s\n",
3285	    iosvc->ldom_name);
3286
3287	/*
3288	 *  loop forever until etm_is_dying or thr_is_dying
3289	 */
3290	while (!etm_is_dying && !iosvc->thr_is_dying) {
3291		if (iosvc->ds_hdl != DS_INVALID_HDL &&
3292		    iosvc->start_sending_Q > 0) {
3293			(void) pthread_mutex_lock(&iosvc->msg_q_lock);
3294			while (iosvc->msg_q_cur_len > 0 &&
3295			    iosvc->ds_hdl != DS_INVALID_HDL)  {
3296				(void) etm_iosvc_msg_deq(fmd_hdl, iosvc,
3297				    &msg_ele);
3298				if (etm_debug_lvl >= 3) {
3299					fmd_hdl_debug(fmd_hdl, "info: valid "
3300					    "ds_hdl before ds_send_msg \n");
3301				}
3302				(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
3303
3304				iosvc->ack_ok = 0;
3305				ev_hdrp = (etm_proto_v1_ev_hdr_t *)
3306				    ((ptrdiff_t)msg_ele.msg);
3307				ev_hdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1;
3308				while (!iosvc->ack_ok &&
3309				    iosvc->ds_hdl != DS_INVALID_HDL &&
3310				    !etm_is_dying) {
3311					/*
3312					 * call ds_send_msg() to send the msg,
3313					 * wait for the recv end to send the
3314					 * resp msg back.
3315					 * If resp msg is recv-ed, ack_ok
3316					 * will be set to 1.
3317					 * otherwise, retry.
3318					 */
3319					if (etm_send_ds_msg(fmd_hdl, B_TRUE,
3320					    iosvc, &msg_ele, ev_hdrp) < 0) {
3321						continue;
3322					}
3323
3324					if (etm_is_dying || iosvc->thr_is_dying)
3325						break;
3326				}
3327
3328				/*
3329				 * if out of the while loop but !ack_ok, ie,
3330				 * ds_hdl becomes invalid at some point
3331				 * while waiting the resp msg, we need to put
3332				 * the msg back to the head of the Q.
3333				 */
3334				if (!iosvc->ack_ok) {
3335					(void) pthread_mutex_lock(
3336					    &iosvc->msg_q_lock);
3337					/*
3338					 * put the msg back to the head of Q.
3339					 * If the Q is full at this point,
3340					 * drop the msg at the tail, enq this
3341					 * msg to the head.
3342					 */
3343					etm_msg_enq_head(fmd_hdl, iosvc,
3344					    &msg_ele);
3345					(void) pthread_mutex_unlock(
3346					    &iosvc->msg_q_lock);
3347				}
3348
3349				/*
3350				 *
3351				 * grab the lock to check the Q len again
3352				 */
3353				(void) pthread_mutex_lock(&iosvc->msg_q_lock);
3354				if (etm_is_dying || iosvc->thr_is_dying) {
3355					break;
3356				}
3357			} /* while dequeing iosvc msgs to send */
3358
3359			/*
3360			 * we have the mutex lock for msg_q_lock at this point
3361			 * we are here because
3362			 * 1) q_len == 0: then wait on the cv for Q to be filled
3363			 * 2) etm_is_dying
3364			 */
3365			if (!etm_is_dying && !iosvc->thr_is_dying &&
3366			    iosvc->msg_q_cur_len == 0) {
3367				fmd_hdl_debug(fmd_hdl,
3368				    "info: waiting on msg_q_cv\n");
3369				(void) pthread_cond_wait(&iosvc->msg_q_cv,
3370				    &iosvc->msg_q_lock);
3371			}
3372			(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
3373			if (etm_is_dying || iosvc->thr_is_dying)  {
3374				break;
3375			}
3376		} else {
3377			(void) etm_sleep(1);
3378		} /* wait for the start_sendingQ > 0 */
3379	} /* etm_is_dying or thr_is_dying */
3380	fmd_hdl_debug(fmd_hdl, "info; etm send thread exiting \n");
3381} /* etm_send_to_remote_root */
3382
3383
3384/*
3385 * receive etm msgs from the remote root ldom by calling ds_recv_msg()
3386 * if FMA events/ereports, call fmd_xprt_post() to post to fmd
3387 * send ACK back by calling ds_send_msg()
3388 */
3389static void
3390etm_recv_from_remote_root(void *arg)
3391{
3392	etm_iosvc_t		*iosvc = (etm_iosvc_t *)arg;	/* iosvc ptr */
3393	etm_proto_v1_pp_t	*pp;		/* protocol preamble */
3394	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* for FMA_EVENT msg */
3395	etm_proto_v1_resp_hdr_t	*resp_hdrp;	/* for RESPONSE msg */
3396	int32_t			resp_code = 0;	/* default is success */
3397	int32_t			rc;		/* return value */
3398	size_t			maxlen = MAXLEN;
3399						/* max msg len */
3400	char 			msgbuf[MAXLEN];	/* recv msg buf */
3401	size_t			msg_size;	/* recv msg size */
3402	size_t			hdr_sz;		/* sizeof *hdrp */
3403	size_t			evsz;		/* sizeof *evp */
3404	size_t			fma_event_size;	/* sizeof FMA event  */
3405	nvlist_t 		*evp;		/* ptr to the nvlist */
3406	char			*buf;		/* ptr to the nvlist */
3407	static uint32_t		mem_alloc = 0;	/* indicate if alloc mem */
3408	char 			*msg;		/* ptr to alloc mem */
3409	fmd_hdl_t		*fmd_hdl = init_hdl;
3410
3411
3412
3413	fmd_hdl_debug(fmd_hdl,
3414	    "info: recv from remote iosvc starting with ldom name %s \n",
3415	    iosvc->ldom_name);
3416
3417	/*
3418	 * loop forever until etm_is_dying or the thread is dying
3419	 */
3420
3421	msg = msgbuf;
3422	while (!etm_is_dying && !iosvc->thr_is_dying) {
3423		if (iosvc->ds_hdl == DS_INVALID_HDL) {
3424			fmd_hdl_debug(fmd_hdl,
3425			    "info: ds_hdl is invalid in recv thr\n");
3426			(void) etm_sleep(1);
3427			continue;
3428		}
3429
3430		/*
3431		 * for now, there are FMA_EVENT and ACK msg type.
3432		 * use FMA_EVENT buf as the maxlen, hdr+1 fma event.
3433		 * FMA_EVENT is big enough to hold an ACK msg.
3434		 * the actual msg size received is in msg_size.
3435		 */
3436		rc = (*etm_ds_recv_msg)(iosvc->ds_hdl, msg, maxlen, &msg_size);
3437		if (rc == EFBIG) {
3438			fmd_hdl_debug(fmd_hdl,
3439			    "info: ds_recv_msg needs mem the size of %d\n",
3440			    msg_size);
3441			msg = fmd_hdl_zalloc(fmd_hdl, msg_size, FMD_SLEEP);
3442			mem_alloc = 1;
3443		} else if (rc == 0) {
3444			fmd_hdl_debug(fmd_hdl,
3445			    "info: ds_recv_msg received a msg ok\n");
3446			/*
3447			 * check the magic # in  msg.hdr
3448			 */
3449			pp = (etm_proto_v1_pp_t *)((ptrdiff_t)msg);
3450			if (pp->pp_magic_num != ETM_PROTO_MAGIC_NUM) {
3451				fmd_hdl_debug(fmd_hdl,
3452				    "info: bad ds recv on magic\n");
3453				continue;
3454			}
3455
3456			/*
3457			 * check the msg type against msg_size to be sure
3458			 * that received msg is not a truncated msg
3459			 */
3460			if (pp->pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) {
3461
3462				ev_hdrp = (etm_proto_v1_ev_hdr_t *)
3463				    ((ptrdiff_t)msg);
3464				fmd_hdl_debug(fmd_hdl, "info: ds received "
3465				    "FMA EVENT xid=%d msg_size=%d\n",
3466				    ev_hdrp->ev_pp.pp_xid, msg_size);
3467				hdr_sz = sizeof (*ev_hdrp) +
3468				    1*(sizeof (ev_hdrp->ev_lens[0]));
3469				fma_event_size = hdr_sz + ev_hdrp->ev_lens[0];
3470				if (fma_event_size != msg_size) {
3471					fmd_hdl_debug(fmd_hdl, "info: wrong "
3472					    "ev msg size received\n");
3473					continue;
3474					/*
3475					 * Simply  do nothing. The send side
3476					 * will timedcond_wait waiting on the
3477					 * resp msg will timeout and
3478					 * re-send the same msg.
3479					 */
3480				}
3481				if (etm_debug_lvl >= 3) {
3482					fmd_hdl_debug(fmd_hdl,  "info: recv msg"
3483					    " size %d hdrsz %d evp size %d\n",
3484					    msg_size, hdr_sz,
3485					    ev_hdrp->ev_lens[0]);
3486				}
3487
3488				if (ev_hdrp->ev_pp.pp_xid !=
3489				    iosvc->xid_posted_ev) {
3490					/*
3491					 * different from last xid posted to
3492					 * fmd, post to fmd now.
3493					 */
3494					buf = msg + hdr_sz;
3495					rc = nvlist_unpack(buf,
3496					    ev_hdrp->ev_lens[0], &evp, 0);
3497					rc = nvlist_size(evp, &evsz,
3498					    NV_ENCODE_XDR);
3499					fmd_hdl_debug(fmd_hdl,
3500					    "info: evp size %d before fmd"
3501					    "post\n", evsz);
3502
3503					if ((rc = etm_post_to_fmd(fmd_hdl,
3504					    iosvc->fmd_xprt, evp)) >= 0) {
3505						fmd_hdl_debug(fmd_hdl,
3506						    "info: xid posted to fmd %d"
3507						    "\n",
3508						    ev_hdrp->ev_pp.pp_xid);
3509						iosvc->xid_posted_ev =
3510						    ev_hdrp->ev_pp.pp_xid;
3511					}
3512				}
3513
3514				/*
3515				 * ready to  send the RESPONSE msg back
3516				 * reuse the msg buffer as the response buffer
3517				 */
3518				resp_hdrp = (etm_proto_v1_resp_hdr_t *)
3519				    ((ptrdiff_t)msg);
3520				resp_hdrp->resp_pp.pp_msg_type =
3521				    ETM_MSG_TYPE_RESPONSE;
3522
3523				resp_hdrp->resp_code = resp_code;
3524				resp_hdrp->resp_len = sizeof (*resp_hdrp);
3525
3526				/*
3527				 * send the whole response msg in one send
3528				 */
3529				if ((*etm_ds_send_msg)(iosvc->ds_hdl, msg,
3530				    sizeof (*resp_hdrp)) != 0) {
3531					fmd_hdl_debug(fmd_hdl,
3532					    "info: send response msg failed\n");
3533				} else {
3534					fmd_hdl_debug(fmd_hdl,
3535					    "info: ds send resp msg ok"
3536					    "size %d\n", sizeof (*resp_hdrp));
3537				}
3538			} else if (pp->pp_msg_type == ETM_MSG_TYPE_RESPONSE) {
3539				fmd_hdl_debug(fmd_hdl,
3540				    "info: ds received respond msg xid=%d"
3541				    "msg_size=%d for ldom %s\n", pp->pp_xid,
3542				    msg_size, iosvc->ldom_name);
3543				if (sizeof (*resp_hdrp) != msg_size) {
3544					fmd_hdl_debug(fmd_hdl,
3545					    "info: wrong resp msg size"
3546					    "received\n");
3547					fmd_hdl_debug(fmd_hdl,
3548					    "info: resp msg size %d recv resp"
3549					    "msg size %d\n",
3550					    sizeof (*resp_hdrp), msg_size);
3551					continue;
3552				}
3553				/*
3554				 * is the pp.pp_xid == iosvc->cur_send_xid+1,
3555				 * if so, nudge the send routine to send next
3556				 */
3557				if (pp->pp_xid != iosvc->cur_send_xid+1) {
3558					fmd_hdl_debug(fmd_hdl,
3559					    "info: ds received resp msg xid=%d "
3560					    "doesn't match cur_send_id=%d\n",
3561					    pp->pp_xid, iosvc->cur_send_xid+1);
3562					continue;
3563				}
3564				(void) pthread_mutex_lock(&iosvc->msg_ack_lock);
3565				iosvc->ack_ok = 1;
3566				(void) pthread_cond_signal(&iosvc->msg_ack_cv);
3567				(void) pthread_mutex_unlock(
3568				    &iosvc->msg_ack_lock);
3569				fmd_hdl_debug(fmd_hdl,
3570				    "info: signaling msg_ack_cv\n");
3571			} else {
3572				/*
3573				 * place holder for future msg types
3574				 */
3575				fmd_hdl_debug(fmd_hdl,
3576				    "info: ds received unrecognized msg\n");
3577			}
3578			if (mem_alloc) {
3579				fmd_hdl_free(fmd_hdl, msg, msg_size);
3580				mem_alloc = 0;
3581				msg = msgbuf;
3582			}
3583		} else {
3584			if (etm_debug_lvl >= 3) {
3585				fmd_hdl_debug(fmd_hdl,
3586				    "info: ds_recv_msg() failed\n");
3587			}
3588		} /* ds_recv_msg() returns */
3589	} /* etm_is_dying */
3590
3591	/*
3592	 * need to free the mem allocated in msg upon exiting the thread
3593	 */
3594	if (mem_alloc) {
3595		fmd_hdl_free(fmd_hdl, msg, msg_size);
3596		mem_alloc = 0;
3597		msg = msgbuf;
3598	}
3599	fmd_hdl_debug(fmd_hdl, "info; etm recv thread exiting \n");
3600} /* etm_recv_from_remote_root */
3601
3602
3603
3604/*
3605 * etm_ds_init
3606 *		initialize DS services function pointers by calling
3607 *		dlopen() followed by  dlsym() for each ds func.
3608 *		if any dlopen() or dlsym() call fails, return -ENOENT
3609 *		return >0 for successs, -ENOENT for failure
3610 */
3611static int
3612etm_ds_init(fmd_hdl_t *hdl)
3613{
3614	int rc = 0;
3615
3616	if ((etm_dl_hdl = dlopen(etm_dl_path, etm_dl_mode)) == NULL) {
3617		fmd_hdl_debug(hdl, "error: failed to dlopen %s\n", etm_dl_path);
3618		return (-ENOENT);
3619	}
3620
3621	etm_ds_svc_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops))
3622	    dlsym(etm_dl_hdl, "ds_svc_reg");
3623	if (etm_ds_svc_reg == NULL) {
3624		fmd_hdl_debug(hdl,
3625		    "error: failed to dlsym ds_svc_reg() w/ error %s\n",
3626		    dlerror());
3627		rc = -ENOENT;
3628	}
3629
3630
3631	etm_ds_clnt_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops))
3632	    dlsym(etm_dl_hdl, "ds_clnt_reg");
3633	if (etm_ds_clnt_reg == NULL) {
3634		fmd_hdl_debug(hdl,
3635		    "error: dlsym(ds_clnt_reg) failed w/ errno %d\n", errno);
3636		rc = -ENOENT;
3637	}
3638
3639	etm_ds_send_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen))
3640	    dlsym(etm_dl_hdl, "ds_send_msg");
3641	if (etm_ds_send_msg == NULL) {
3642		fmd_hdl_debug(hdl, "error: dlsym(ds_send_msg) failed\n");
3643		rc = -ENOENT;
3644	}
3645
3646	etm_ds_recv_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen,
3647	    size_t *msglen))dlsym(etm_dl_hdl, "ds_recv_msg");
3648	if (etm_ds_recv_msg == NULL) {
3649		fmd_hdl_debug(hdl, "error: dlsym(ds_recv_msg) failed\n");
3650		rc = -ENOENT;
3651	}
3652
3653	etm_ds_fini = (int (*)(void))dlsym(etm_dl_hdl, "ds_fini");
3654	if (etm_ds_fini == NULL) {
3655		fmd_hdl_debug(hdl, "error: dlsym(ds_fini) failed\n");
3656		rc = -ENOENT;
3657	}
3658
3659	if (rc == -ENOENT) {
3660		(void) dlclose(etm_dl_hdl);
3661	}
3662	return (rc);
3663
3664} /* etm_ds_init() */
3665
3666
3667/*
3668 * -------------------------- FMD entry points -------------------------------
3669 */
3670
3671/*
3672 * _fmd_init - initialize the transport for use by ETM and start the
3673 *		server daemon to accept new connections to us
3674 *
3675 *		FMD will read our *.conf and subscribe us to FMA events
3676 */
3677
3678void
3679_fmd_init(fmd_hdl_t *hdl)
3680{
3681	struct timeval		tmv;		/* timeval */
3682	ssize_t			n;		/* gen use */
3683	const struct facility	*fp;		/* syslog facility matching */
3684	char			*facname;	/* syslog facility property */
3685	uint32_t		type_mask;	/* type of the local host */
3686	int			rc;		/* funcs return code */
3687
3688
3689	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
3690		return; /* invalid data in configuration file */
3691	}
3692
3693	fmd_hdl_debug(hdl, "info: module initializing\n");
3694
3695	init_hdl = hdl;
3696	etm_lhp = ldom_init(etm_init_alloc, etm_init_free);
3697
3698	/*
3699	 * decide the ldom type, do initialization accordingly
3700	 */
3701	if ((rc = ldom_get_type(etm_lhp, &type_mask)) != 0) {
3702		fmd_hdl_debug(hdl, "error: can't decide ldom type\n");
3703		fmd_hdl_debug(hdl, "info: module unregistering\n");
3704		ldom_fini(etm_lhp);
3705		fmd_hdl_unregister(hdl);
3706		return;
3707	}
3708
3709	if ((type_mask & LDOM_TYPE_LEGACY) || (type_mask & LDOM_TYPE_CONTROL)) {
3710		if (type_mask & LDOM_TYPE_LEGACY) {
3711			/*
3712			 * running on a legacy sun4v domain,
3713			 * act as the the old sun4v
3714			 */
3715			etm_ldom_type = LDOM_TYPE_LEGACY;
3716			fmd_hdl_debug(hdl, "info: running as the old sun4v\n");
3717			ldom_fini(etm_lhp);
3718		} else if (type_mask & LDOM_TYPE_CONTROL) {
3719			etm_ldom_type = LDOM_TYPE_CONTROL;
3720			fmd_hdl_debug(hdl, "info: running as control domain\n");
3721
3722			/*
3723			 * looking for libds.so.1.
3724			 * If not found, don't do DS registration. As a result,
3725			 * there will be no DS callbacks or other DS services.
3726			 */
3727			if (etm_ds_init(hdl) >= 0) {
3728				etm_filter_init(hdl);
3729				etm_ckpt_init(hdl);
3730
3731				flags = FMD_XPRT_RDWR | FMD_XPRT_ACCEPT;
3732
3733				/*
3734				 * ds client registration
3735				 */
3736				if ((rc = (*etm_ds_clnt_reg)(&iosvc_caps,
3737				    &iosvc_ops))) {
3738					fmd_hdl_debug(hdl,
3739					"error: ds_clnt_reg(): errno %d\n", rc);
3740				}
3741			} else {
3742				fmd_hdl_debug(hdl, "error: dlopen() libds "
3743				    "failed, continue without the DS services");
3744			}
3745
3746			/*
3747			 * register for ldom status events
3748			 */
3749			if ((rc = ldom_register_event(etm_lhp,
3750			    ldom_event_handler, hdl))) {
3751				fmd_hdl_debug(hdl,
3752				    "error: ldom_register_event():"
3753				    " errno %d\n", rc);
3754			}
3755
3756			/*
3757			 * create the thread for handling both the ldom status
3758			 * change and service events
3759			 */
3760			etm_async_e_tid = fmd_thr_create(hdl,
3761			    etm_async_event_handler, hdl);
3762		}
3763
3764		/* setup statistics and properties from FMD */
3765
3766		(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
3767		    sizeof (etm_stats) / sizeof (fmd_stat_t),
3768		    (fmd_stat_t *)&etm_stats);
3769
3770		etm_fma_resp_wait_time = fmd_prop_get_int32(hdl,
3771		    ETM_PROP_NM_FMA_RESP_WAIT_TIME);
3772		etm_debug_lvl = fmd_prop_get_int32(hdl, ETM_PROP_NM_DEBUG_LVL);
3773		etm_debug_max_ev_cnt = fmd_prop_get_int32(hdl,
3774		    ETM_PROP_NM_DEBUG_MAX_EV_CNT);
3775		fmd_hdl_debug(hdl, "info: etm_debug_lvl %d "
3776		    "etm_debug_max_ev_cnt %d\n", etm_debug_lvl,
3777		    etm_debug_max_ev_cnt);
3778
3779		etm_resp_q_max_len = fmd_prop_get_int32(hdl,
3780		    ETM_PROP_NM_MAX_RESP_Q_LEN);
3781		etm_stats.etm_resp_q_max_len.fmds_value.ui64 =
3782		    etm_resp_q_max_len;
3783		etm_bad_acc_to_sec = fmd_prop_get_int32(hdl,
3784		    ETM_PROP_NM_BAD_ACC_TO_SEC);
3785
3786		/*
3787		 * obtain an FMD transport handle so we can post
3788		 * FMA events later
3789		 */
3790
3791		etm_fmd_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
3792
3793		/*
3794		 * encourage protocol transaction id to be unique per module
3795		 * load
3796		 */
3797
3798		(void) gettimeofday(&tmv, NULL);
3799		etm_xid_cur = (uint32_t)((tmv.tv_sec << 10) |
3800		    ((unsigned long)tmv.tv_usec >> 10));
3801
3802		/* init the ETM transport */
3803
3804		if ((n = etm_xport_init(hdl)) != 0) {
3805			fmd_hdl_error(hdl, "error: bad xport init errno %d\n",
3806			    (-n));
3807			fmd_hdl_unregister(hdl);
3808			return;
3809		}
3810
3811		/*
3812		 * Cache any properties we use every time we receive an alert.
3813		 */
3814		syslog_file = fmd_prop_get_int32(hdl, ETM_PROP_NM_SYSLOGD);
3815		syslog_cons = fmd_prop_get_int32(hdl, ETM_PROP_NM_CONSOLE);
3816
3817		if (syslog_file && (syslog_logfd = open("/dev/conslog",
3818		    O_WRONLY | O_NOCTTY)) == -1) {
3819			fmd_hdl_error(hdl,
3820			    "error: failed to open /dev/conslog");
3821			syslog_file = 0;
3822		}
3823
3824		if (syslog_cons && (syslog_msgfd = open("/dev/sysmsg",
3825		    O_WRONLY | O_NOCTTY)) == -1) {
3826			fmd_hdl_error(hdl, "error: failed to open /dev/sysmsg");
3827			syslog_cons = 0;
3828		}
3829
3830		if (syslog_file) {
3831			/*
3832			 * Look up the value of the "facility" property and
3833			 * use it to determine * what syslog LOG_* facility
3834			 * value we use to fill in our log_ctl_t.
3835			 */
3836			facname = fmd_prop_get_string(hdl,
3837			    ETM_PROP_NM_FACILITY);
3838
3839			for (fp = syslog_facs; fp->fac_name != NULL; fp++) {
3840				if (strcmp(fp->fac_name, facname) == 0)
3841					break;
3842			}
3843
3844			if (fp->fac_name == NULL) {
3845				fmd_hdl_error(hdl, "error: invalid 'facility'"
3846				    " setting: %s\n", facname);
3847				syslog_file = 0;
3848			} else {
3849				syslog_facility = fp->fac_value;
3850				syslog_ctl.flags = SL_CONSOLE | SL_LOGONLY;
3851			}
3852
3853			fmd_prop_free_string(hdl, facname);
3854		}
3855
3856		/*
3857		 * start the message responder and the connection acceptance
3858		 * server; request protocol version be negotiated after waiting
3859		 * a second for the receiver to be ready to start handshaking
3860		 */
3861
3862		etm_resp_tid = fmd_thr_create(hdl, etm_responder, hdl);
3863		etm_svr_tid = fmd_thr_create(hdl, etm_server, hdl);
3864
3865		(void) etm_sleep(ETM_SLEEP_QUIK);
3866		etm_req_ver_negot(hdl);
3867
3868	} else if (type_mask & LDOM_TYPE_ROOT) {
3869		etm_ldom_type = LDOM_TYPE_ROOT;
3870		fmd_hdl_debug(hdl, "info: running as root domain\n");
3871
3872		/*
3873		 * looking for libds.so.1.
3874		 * If not found, don't do DS registration. As a result,
3875		 * there will be no DS callbacks or other DS services.
3876		 */
3877		if (etm_ds_init(hdl) < 0) {
3878			fmd_hdl_debug(hdl,
3879			    "error: dlopen() libds failed, "
3880			    "module unregistering\n");
3881			ldom_fini(etm_lhp);
3882			fmd_hdl_unregister(hdl);
3883			return;
3884		}
3885
3886		/*
3887		 * DS service registration
3888		 */
3889		if ((rc = (*etm_ds_svc_reg)(&iosvc_caps, &iosvc_ops))) {
3890			fmd_hdl_debug(hdl, "error: ds_svc_reg(): errno %d\n",
3891			    rc);
3892		}
3893
3894		/*
3895		 * this thread is created for ds_reg_cb/ds_unreg_cb
3896		 */
3897		etm_async_e_tid = fmd_thr_create(hdl,
3898		    etm_async_event_handler, hdl);
3899
3900		flags = FMD_XPRT_RDWR;
3901	} else if ((type_mask & LDOM_TYPE_IO) || (type_mask == 0)) {
3902		/*
3903		 * Do not load this module if it is
3904		 * . runing on a non-root ldom
3905		 * . the domain owns no io devices
3906		 */
3907		fmd_hdl_debug(hdl,
3908		    "info: non-root ldom, module unregistering\n");
3909		ldom_fini(etm_lhp);
3910		fmd_hdl_unregister(hdl);
3911		return;
3912	} else {
3913		/*
3914		 * place holder, all other cases. unload etm for now
3915		 */
3916		fmd_hdl_debug(hdl,
3917		    "info: other ldom type, module unregistering\n");
3918		ldom_fini(etm_lhp);
3919		fmd_hdl_unregister(hdl);
3920		return;
3921	}
3922
3923	fmd_hdl_debug(hdl, "info: module initialized ok\n");
3924
3925} /* _fmd_init() */
3926
3927/*
3928 * etm_recv - receive an FMA event from FMD and transport it
3929 *		to the remote endpoint
3930 */
3931
3932/*ARGSUSED*/
3933void
3934etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *evp, const char *class)
3935{
3936	etm_xport_addr_t	*addrv;	/* vector of transport addresses */
3937	etm_xport_conn_t	conn;	/* connection handle */
3938	etm_proto_v1_ev_hdr_t	*hdrp;	/* for FMA_EVENT msg */
3939	ssize_t			i, n;	/* gen use */
3940	size_t			sz;	/* header size */
3941	size_t			buflen;	/* size of packed FMA event */
3942	uint8_t			*buf;	/* tmp buffer for packed FMA event */
3943
3944	/*
3945	 * if this is running on a Root Domain, ignore the events,
3946	 * return right away
3947	 */
3948	if (etm_ldom_type == LDOM_TYPE_ROOT)
3949		return;
3950
3951	buflen = 0;
3952	if ((n = nvlist_size(evp, &buflen, NV_ENCODE_XDR)) != 0) {
3953		fmd_hdl_error(hdl, "error: FMA event dropped: "
3954		    "event size errno %d class %s\n", n, class);
3955		etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++;
3956		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
3957		return;
3958	}
3959
3960	fmd_hdl_debug(hdl, "info: rcvd event %p from FMD\n", evp);
3961	fmd_hdl_debug(hdl, "info: cnt %llu class %s\n",
3962	    etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, class);
3963
3964	etm_stats.etm_rd_fmd_bytes.fmds_value.ui64 += buflen;
3965	etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64++;
3966
3967	/*
3968	 * if the debug limit has been set, avoid excessive traffic,
3969	 * for example, an infinite cycle using loopback nodes
3970	 */
3971
3972	if ((etm_debug_max_ev_cnt >= 0) &&
3973	    (etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64 >
3974	    etm_debug_max_ev_cnt)) {
3975		fmd_hdl_debug(hdl, "warning: FMA event dropped: "
3976		    "event %p cnt %llu > debug max %d\n", evp,
3977		    etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64,
3978		    etm_debug_max_ev_cnt);
3979		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
3980		return;
3981	}
3982
3983	/* allocate a buffer for the FMA event and nvlist pack it */
3984
3985	buf = fmd_hdl_zalloc(hdl, buflen, FMD_SLEEP);
3986
3987	/*
3988	 * increment the ttl value if the event is from remote (a root domain)
3989	 * uncomment this when enabling fault forwarding from Root domains
3990	 * to Control domain.
3991	 *
3992	 * uint8_t			ttl;
3993	 * if (fmd_event_local(hdl, evp) != FMD_EVF_LOCAL) {
3994	 *	if (nvlist_lookup_uint8(evp, FMD_EVN_TTL, &ttl) == 0) {
3995	 *		(void) nvlist_remove(evp, FMD_EVN_TTL, DATA_TYPE_UINT8);
3996	 *		(void) nvlist_add_uint8(evp, FMD_EVN_TTL, ttl + 1);
3997	 *	}
3998	 * }
3999	 */
4000
4001	if ((n = nvlist_pack(evp, (char **)&buf, &buflen,
4002	    NV_ENCODE_XDR, 0)) != 0) {
4003		fmd_hdl_error(hdl, "error: FMA event dropped: "
4004		    "event pack errno %d class %s\n", n, class);
4005		etm_stats.etm_os_nvlist_pack_fail.fmds_value.ui64++;
4006		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4007		fmd_hdl_free(hdl, buf, buflen);
4008		return;
4009	}
4010
4011	/* get vector of dst addrs and send the FMA event to each one */
4012
4013	if ((addrv = etm_xport_get_ev_addrv(hdl, evp)) == NULL) {
4014		fmd_hdl_error(hdl, "error: FMA event dropped: "
4015		    "bad event dst addrs errno %d\n", errno);
4016		etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++;
4017		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4018		fmd_hdl_free(hdl, buf, buflen);
4019		return;
4020	}
4021
4022	for (i = 0; addrv[i] != NULL; i++) {
4023
4024		/* open a new connection to this dst addr */
4025
4026		if ((n = etm_conn_open(hdl, "FMA event dropped: "
4027		    "bad conn open on new ev", addrv[i], &conn)) < 0) {
4028			etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4029			continue;
4030		}
4031
4032		(void) pthread_mutex_lock(&etm_write_lock);
4033
4034		/* write the ETM message header */
4035
4036		if ((hdrp = etm_hdr_write(hdl, conn, evp, NV_ENCODE_XDR,
4037		    &sz)) == NULL) {
4038			(void) pthread_mutex_unlock(&etm_write_lock);
4039			fmd_hdl_error(hdl, "error: FMA event dropped: "
4040			    "bad hdr write errno %d\n", errno);
4041			(void) etm_conn_close(hdl,
4042			    "bad conn close per bad hdr wr", conn);
4043			etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4044			continue;
4045		}
4046
4047		fmd_hdl_free(hdl, hdrp, sz);	/* header not needed */
4048		etm_stats.etm_wr_hdr_fmaevent.fmds_value.ui64++;
4049		fmd_hdl_debug(hdl, "info: hdr xport write ok for event %p\n",
4050		    evp);
4051
4052		/* write the ETM message body, ie, the packed nvlist */
4053
4054		if ((n = etm_io_op(hdl, "FMA event dropped: "
4055		    "bad io write on event", conn,
4056		    buf, buflen, ETM_IO_OP_WR)) < 0) {
4057			(void) pthread_mutex_unlock(&etm_write_lock);
4058			(void) etm_conn_close(hdl,
4059			    "bad conn close per bad body wr", conn);
4060			etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4061			continue;
4062		}
4063
4064		(void) pthread_mutex_unlock(&etm_write_lock);
4065
4066		etm_stats.etm_wr_body_fmaevent.fmds_value.ui64++;
4067		etm_stats.etm_wr_xport_bytes.fmds_value.ui64 += buflen;
4068		fmd_hdl_debug(hdl, "info: body xport write ok for event %p\n",
4069		    evp);
4070
4071		/* close the connection */
4072
4073		(void) etm_conn_close(hdl, "bad conn close after event send",
4074		    conn);
4075	} /* foreach dst addr in the vector */
4076
4077	etm_xport_free_addrv(hdl, addrv);
4078	fmd_hdl_free(hdl, buf, buflen);
4079
4080} /* etm_recv() */
4081
4082
4083/*
4084 * etm_send -	receive an FMA event from FMD and enQ it in the iosvc.Q.
4085 *		etm_send_to_remote_root() deQ and xprt the FMA events to a
4086 *		remote root domain
4087 *		return FMD_SEND_SUCCESS for success,
4088 *		       FMD_SEND_FAILED for error
4089 */
4090
4091/*ARGSUSED*/
4092int
4093etm_send(fmd_hdl_t *fmd_hdl, fmd_xprt_t *xp, fmd_event_t *ep, nvlist_t *nvl)
4094{
4095	uint32_t	pack_it;	/* whether to pack/enq the event */
4096	etm_pack_msg_type_t	msg_type;
4097					/* tell etm_pack_ds_msg() what to do */
4098	etm_iosvc_t	*iosvc;		/* ptr to cur iosvc struct */
4099	char 		*class;		/* nvlist class name */
4100
4101	pack_it = 1;
4102	msg_type = FMD_XPRT_OTHER_MSG;
4103
4104	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
4105	if (class == NULL) {
4106		pack_it = 0;
4107	} else  {
4108		if (etm_debug_lvl >= 1) {
4109			fmd_hdl_debug(fmd_hdl,
4110			    "info: evp class= %s in etm_send\n", class);
4111		}
4112
4113		if (etm_ldom_type ==  LDOM_TYPE_CONTROL) {
4114			iosvc =
4115			    (etm_iosvc_t *)fmd_xprt_getspecific(fmd_hdl, xp);
4116
4117			/*
4118			 * check the flag FORWARDING_FAULTS_TO_CONTROL to
4119			 * decide if or not to drop fault subscription
4120			 * control msgs
4121			 */
4122			if (strcmp(class, "resource.fm.xprt.subscribe") == 0) {
4123				pack_it = 0;
4124				/*
4125				 * if (FORWARDING_FAULTS_TO_CONTROL == 1) {
4126				 * (void) nvlist_lookup_string(nvl,
4127				 *    FM_RSRC_XPRT_SUBCLASS, &subclass);
4128				 * if (strcmp(subclass, "list.suspect")
4129				 *    == 0) {
4130				 *	pack_it = 1;
4131				 *	msg_action = FMD_XPRT_OTHER_MSG;
4132				 * }
4133				 * if (strcmp(subclass, "list.repaired")
4134				 *    == 0) {
4135				 *	pack_it = 1;
4136				 *	msg_action = FMD_XPRT_OTHER_MSG;
4137				 * }
4138				 * }
4139				 */
4140			}
4141			if (strcmp(class, "resource.fm.xprt.run") == 0) {
4142				pack_it = 1;
4143				msg_type = FMD_XPRT_RUN_MSG;
4144			}
4145		} else { /* has to be the root domain ldom */
4146			iosvc = &io_svc;
4147			/*
4148			 * drop all ereport and fault subscriptions
4149			 * are we dropping too much here, more than just ereport
4150			 * and fault subscriptions? need to check
4151			 */
4152			if (strcmp(class, "resource.fm.xprt.subscribe") == 0)
4153				pack_it = 0;
4154			if (strcmp(class, "resource.fm.xprt.run") == 0) {
4155				pack_it = 1;
4156				msg_type = FMD_XPRT_RUN_MSG;
4157			}
4158		}
4159	}
4160
4161	if (pack_it)  {
4162		if (etm_debug_lvl >= 1) {
4163			fmd_hdl_debug(fmd_hdl,
4164			    "info: ldom name returned from xprt get specific="
4165			    "%s xprt=%lld\n", iosvc->ldom_name, xp);
4166		}
4167		/*
4168		 * pack the etm msg for the DS library and  enq in io_svc->Q
4169		 * when the hdrp is NULL, the packing func will use the static
4170		 * iosvc_hdr
4171		 */
4172		(void) etm_pack_ds_msg(fmd_hdl, iosvc, NULL, 0, nvl, msg_type,
4173		    ETM_CKPT_NOOP);
4174	}
4175
4176	return (FMD_SEND_SUCCESS);
4177
4178} /* etm_send() */
4179
4180
4181
4182/*
4183 * _fmd_fini - stop the server daemon and teardown the transport
4184 */
4185
4186void
4187_fmd_fini(fmd_hdl_t *hdl)
4188{
4189	ssize_t			n;		/* gen use */
4190	etm_iosvc_t		*iosvc;		/* ptr to insvc struct */
4191	etm_iosvc_q_ele_t	msg_ele;	/* iosvc msg ele */
4192	uint32_t		i;		/* for loop var */
4193
4194	fmd_hdl_debug(hdl, "info: module finalizing\n");
4195
4196	/* kill the connection server and responder ; wait for them to die */
4197
4198	etm_is_dying = 1;
4199
4200	if (etm_svr_tid != NULL) {
4201		fmd_thr_signal(hdl, etm_svr_tid);
4202		fmd_thr_destroy(hdl, etm_svr_tid);
4203		etm_svr_tid = NULL;
4204	} /* if server thread was successfully created */
4205
4206	if (etm_resp_tid != NULL) {
4207		fmd_thr_signal(hdl, etm_resp_tid);
4208		fmd_thr_destroy(hdl, etm_resp_tid);
4209		etm_resp_tid = NULL;
4210	} /* if responder thread was successfully created */
4211
4212	if (etm_async_e_tid != NULL) {
4213		fmd_thr_signal(hdl, etm_async_e_tid);
4214		fmd_thr_destroy(hdl, etm_async_e_tid);
4215		etm_async_e_tid = NULL;
4216	} /* if async event handler thread was successfully created */
4217
4218
4219	if ((etm_ldom_type == LDOM_TYPE_LEGACY) ||
4220	    (etm_ldom_type == LDOM_TYPE_CONTROL)) {
4221
4222		/* teardown the transport and cleanup syslogging */
4223		if ((n = etm_xport_fini(hdl)) != 0) {
4224			fmd_hdl_error(hdl, "warning: xport fini errno %d\n",
4225			    (-n));
4226		}
4227		if (etm_fmd_xprt != NULL) {
4228			fmd_xprt_close(hdl, etm_fmd_xprt);
4229		}
4230
4231		if (syslog_logfd != -1) {
4232			(void) close(syslog_logfd);
4233		}
4234		if (syslog_msgfd != -1) {
4235			(void) close(syslog_msgfd);
4236		}
4237	}
4238
4239	if (etm_ldom_type == LDOM_TYPE_CONTROL)  {
4240		if (ldom_unregister_event(etm_lhp))
4241			fmd_hdl_debug(hdl, "ldom_unregister_event() failed\n");
4242
4243		/*
4244		 * On control domain side, there may be multiple iosvc struct
4245		 * in use, one for each bound/active domain. Each struct
4246		 * manages a queue of fma events destined to the root domain.
4247		 * Need to go thru every iosvc struct to clean up its resources.
4248		 */
4249		for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) {
4250			if (iosvc_list[i].ldom_name[0] != '\0') {
4251				/*
4252				 * found an iosvc struct for a root domain
4253				 */
4254				iosvc = &iosvc_list[i];
4255				(void) pthread_mutex_lock(&iosvc_list_lock);
4256				etm_iosvc_cleanup(hdl, iosvc, B_TRUE, B_FALSE);
4257				(void) pthread_mutex_unlock(&iosvc_list_lock);
4258
4259			} else {
4260				/*
4261				 * reach the end of existing iosvc structures
4262				 */
4263				continue;
4264			}
4265		} /* for i<NUM_OF_ROOT_DOMAINS */
4266		etm_ckpt_fini(hdl);
4267		etm_filter_fini(hdl);
4268
4269		ldom_fini(etm_lhp);
4270
4271	} else if (etm_ldom_type == LDOM_TYPE_ROOT) {
4272		/*
4273		 * On root domain side, there is only one iosvc struct in use.
4274		 */
4275		iosvc = &io_svc;
4276		if (iosvc->send_tid != NULL) {
4277			fmd_thr_signal(hdl, iosvc->send_tid);
4278			fmd_thr_destroy(hdl, iosvc->send_tid);
4279			iosvc->send_tid = NULL;
4280		} /* if io svc send thread was successfully created */
4281
4282		if (iosvc->recv_tid != NULL) {
4283			fmd_thr_signal(hdl, iosvc->recv_tid);
4284			fmd_thr_destroy(hdl, iosvc->recv_tid);
4285			iosvc->recv_tid = NULL;
4286		} /* if io svc receive thread was successfully created */
4287
4288		(void) pthread_mutex_lock(&iosvc->msg_q_lock);
4289		while (iosvc->msg_q_cur_len > 0) {
4290			(void) etm_iosvc_msg_deq(hdl, iosvc, &msg_ele);
4291			fmd_hdl_free(hdl, msg_ele.msg, msg_ele.msg_size);
4292		}
4293		(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
4294
4295		if (iosvc->fmd_xprt != NULL)
4296			fmd_xprt_close(hdl, iosvc->fmd_xprt);
4297		ldom_fini(etm_lhp);
4298	}
4299	if (etm_ds_fini) {
4300		(*etm_ds_fini)();
4301		(void) dlclose(etm_dl_hdl);
4302	}
4303
4304	fmd_hdl_debug(hdl, "info: module finalized ok\n");
4305
4306} /* _fmd_fini() */
4307