1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2012 Joyent, Inc.  All rights reserved.
26  */
27 
28 /*
29  * /dev/ipmi IPMI monitor
30  *
31  * The purpose of this module is to monitor the connection between the system
32  * and the service processor attached via /dev/ipmi0.  The module assumes the SP
33  * supports the Sun OEM uptime IPMI command.  If the BMC connection does not
34  * exist, or the uptime function is not implemented, then the module unloads
35  * without doing anything.
36  *
37  * When the module is first loaded, or a reset is detected, the module will
38  * generate the ESC_PLATFORM_SP_RESET sysevent as a system-wide notification to
39  * indicate that this event has occurred.
40  *
41  * Note that this event generation is not guaranteed to have a one-to-one
42  * correspondence with an SP reset.  There is no persistence, so if fmd is
43  * restarted we will generate this event again.  Thus the event only indicates
44  * the possibility that the SP has been reset.  This could be enhanced using fmd
45  * checkpoints to have some persistent state to avoid this scenario.  However,
46  * it currently serves the useful dual purpose of notifying consumers of system
47  * startup as well as SP reset through a single channel.
48  */
49 
50 #include <errno.h>
51 #include <libipmi.h>
52 #include <libsysevent.h>
53 #include <string.h>
54 #include <fm/fmd_api.h>
55 #include <sys/sysevent/eventdefs.h>
56 
57 typedef struct sp_monitor {
58 	ipmi_handle_t	*sm_hdl;
59 	uint32_t	sm_seconds;
60 	uint32_t	sm_generation;
61 	hrtime_t	sm_interval;
62 } sp_monitor_t;
63 
64 static void
sp_post_sysevent(fmd_hdl_t * hdl)65 sp_post_sysevent(fmd_hdl_t *hdl)
66 {
67 	sp_monitor_t *smp = fmd_hdl_getspecific(hdl);
68 	sysevent_id_t eid;
69 
70 	fmd_hdl_debug(hdl, "SP reset detected, posting sysevent");
71 
72 	if (sysevent_post_event(EC_PLATFORM, ESC_PLATFORM_SP_RESET,
73 	    SUNW_VENDOR, "fmd", NULL, &eid) != 0) {
74 		fmd_hdl_debug(hdl, "failed to send sysevent: %s",
75 		    strerror(errno));
76 		/*
77 		 * We reset the seconds and generation so that the next time
78 		 * through we will try to post the sysevent again.
79 		 */
80 		smp->sm_seconds = -1U;
81 		smp->sm_generation = -1U;
82 	}
83 }
84 
85 /*ARGSUSED*/
86 static void
sp_timeout(fmd_hdl_t * hdl,id_t id,void * data)87 sp_timeout(fmd_hdl_t *hdl, id_t id, void *data)
88 {
89 	sp_monitor_t *smp = fmd_hdl_getspecific(hdl);
90 	uint32_t seconds, generation;
91 
92 	if (ipmi_sunoem_uptime(smp->sm_hdl, &seconds, &generation) != 0) {
93 		/*
94 		 * Ignore uptime failures.  We will generate the appropriate
95 		 * event when it comes back online.
96 		 */
97 		fmd_hdl_debug(hdl, "failed to get uptime: %s",
98 		    ipmi_errmsg(smp->sm_hdl));
99 	} else {
100 		/*
101 		 * We want to catch cases where the generation number is
102 		 * explicitly reset, or when the SP configuration is reset after
103 		 * a reboot (and the generation number is 0).  We also post a
104 		 * sysevent when the module initially loads, since we can't be
105 		 * sure if we missed a SP reset or not.
106 		 */
107 		if (seconds < smp->sm_seconds ||
108 		    generation != smp->sm_generation ||
109 		    smp->sm_seconds == 0)
110 			sp_post_sysevent(hdl);
111 
112 		smp->sm_seconds = seconds;
113 		smp->sm_generation = generation;
114 	}
115 
116 	(void) fmd_timer_install(hdl, NULL, NULL, smp->sm_interval);
117 }
118 
119 static const fmd_hdl_ops_t fmd_ops = {
120 	NULL,		/* fmdo_recv */
121 	sp_timeout,	/* fmdo_timeout */
122 	NULL,		/* fmdo_close */
123 	NULL,		/* fmdo_stats */
124 	NULL,		/* fmdo_gc */
125 };
126 
127 static const fmd_prop_t fmd_props[] = {
128 	{ "interval", FMD_TYPE_TIME, "60sec" },
129 	{ NULL, 0, NULL }
130 };
131 
132 static const fmd_hdl_info_t fmd_info = {
133 	"Service Processor Monitor", "1.0", &fmd_ops, fmd_props
134 };
135 
136 void
_fmd_init(fmd_hdl_t * hdl)137 _fmd_init(fmd_hdl_t *hdl)
138 {
139 	sp_monitor_t *smp;
140 	int error;
141 	char *msg;
142 
143 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
144 		return;
145 
146 	smp = fmd_hdl_zalloc(hdl, sizeof (sp_monitor_t), FMD_SLEEP);
147 	fmd_hdl_setspecific(hdl, smp);
148 
149 	if ((smp->sm_hdl = ipmi_open(&error, &msg, IPMI_TRANSPORT_BMC, NULL))
150 	    == NULL) {
151 		/*
152 		 * If /dev/ipmi0 doesn't exist on the system, then unload the
153 		 * module without doing anything.
154 		 */
155 		if (error != EIPMI_BMC_OPEN_FAILED)
156 			fmd_hdl_abort(hdl, "failed to initialize IPMI "
157 			    "connection: %s\n", msg);
158 		fmd_hdl_debug(hdl, "failed to load: no IPMI connection "
159 		    "present");
160 		fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t));
161 		fmd_hdl_unregister(hdl);
162 		return;
163 	}
164 
165 	/*
166 	 * Attempt an initial uptime() call.  If the IPMI command is
167 	 * unrecognized, then this is an unsupported platform and the module
168 	 * should be unloaded.  Any other error is treated is transient failure.
169 	 */
170 	if ((error = ipmi_sunoem_uptime(smp->sm_hdl, &smp->sm_seconds,
171 	    &smp->sm_generation)) != 0 &&
172 	    ipmi_errno(smp->sm_hdl) == EIPMI_INVALID_COMMAND) {
173 		fmd_hdl_debug(hdl, "failed to load: uptime command "
174 		    "not supported");
175 		ipmi_close(smp->sm_hdl);
176 		fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t));
177 		fmd_hdl_unregister(hdl);
178 		return;
179 	}
180 
181 	smp->sm_interval = fmd_prop_get_int64(hdl, "interval");
182 
183 	if (error == 0)
184 		fmd_hdl_debug(hdl, "successfully loaded, uptime = %u seconds "
185 		    "(generation %u)", smp->sm_seconds, smp->sm_generation);
186 	else
187 		fmd_hdl_debug(hdl, "successfully loaded, but uptime call "
188 		    "failed: %s", ipmi_errmsg(smp->sm_hdl));
189 
190 	/*
191 	 * Setup the recurring timer.
192 	 */
193 	(void) fmd_timer_install(hdl, NULL, NULL, 0);
194 }
195 
196 void
_fmd_fini(fmd_hdl_t * hdl)197 _fmd_fini(fmd_hdl_t *hdl)
198 {
199 	sp_monitor_t *smp = fmd_hdl_getspecific(hdl);
200 
201 	if (smp) {
202 		ipmi_close(smp->sm_hdl);
203 		fmd_hdl_free(hdl, smp, sizeof (sp_monitor_t));
204 	}
205 }
206