md_ddf.c revision 08b90a5b47616790b430cf96b814ac353d03ab53
1/*-
2 * Copyright (c) 2012 Alexander Motin <mav@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/endian.h>
33#include <sys/kernel.h>
34#include <sys/kobj.h>
35#include <sys/limits.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/mutex.h>
39#include <sys/systm.h>
40#include <sys/time.h>
41#include <sys/clock.h>
42#include <geom/geom.h>
43#include "geom/raid/g_raid.h"
44#include "geom/raid/md_ddf.h"
45#include "g_raid_md_if.h"
46
47static MALLOC_DEFINE(M_MD_DDF, "md_ddf_data", "GEOM_RAID DDF metadata");
48
49#define	DDF_MAX_DISKS_HARD	128
50
51#define	DDF_MAX_DISKS	16
52#define	DDF_MAX_VDISKS	7
53#define	DDF_MAX_PARTITIONS	1
54
55#define DECADE (3600*24*(365*10+2))	/* 10 years in seconds. */
56
57struct ddf_meta {
58	u_int	sectorsize;
59	u_int	bigendian;
60	struct ddf_header *hdr;
61	struct ddf_cd_record *cdr;
62	struct ddf_pd_record *pdr;
63	struct ddf_vd_record *vdr;
64	void *cr;
65	struct ddf_pdd_record *pdd;
66	struct ddf_bbm_log *bbm;
67};
68
69struct ddf_vol_meta {
70	u_int	sectorsize;
71	u_int	bigendian;
72	struct ddf_header *hdr;
73	struct ddf_cd_record *cdr;
74	struct ddf_vd_entry *vde;
75	struct ddf_vdc_record *vdc;
76	struct ddf_vdc_record *bvdc[DDF_MAX_DISKS_HARD];
77};
78
79struct g_raid_md_ddf_perdisk {
80	struct ddf_meta	 pd_meta;
81};
82
83struct g_raid_md_ddf_pervolume {
84	struct ddf_vol_meta		 pv_meta;
85	int				 pv_started;
86	struct callout			 pv_start_co;	/* STARTING state timer. */
87};
88
89struct g_raid_md_ddf_object {
90	struct g_raid_md_object	 mdio_base;
91	struct ddf_meta		 mdio_meta;
92	int			 mdio_starting;
93	struct callout		 mdio_start_co;	/* STARTING state timer. */
94	int			 mdio_started;
95	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
96};
97
98static g_raid_md_create_t g_raid_md_create_ddf;
99static g_raid_md_taste_t g_raid_md_taste_ddf;
100static g_raid_md_event_t g_raid_md_event_ddf;
101static g_raid_md_volume_event_t g_raid_md_volume_event_ddf;
102static g_raid_md_ctl_t g_raid_md_ctl_ddf;
103static g_raid_md_write_t g_raid_md_write_ddf;
104static g_raid_md_fail_disk_t g_raid_md_fail_disk_ddf;
105static g_raid_md_free_disk_t g_raid_md_free_disk_ddf;
106static g_raid_md_free_volume_t g_raid_md_free_volume_ddf;
107static g_raid_md_free_t g_raid_md_free_ddf;
108
109static kobj_method_t g_raid_md_ddf_methods[] = {
110	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_ddf),
111	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_ddf),
112	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_ddf),
113	KOBJMETHOD(g_raid_md_volume_event,	g_raid_md_volume_event_ddf),
114	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_ddf),
115	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_ddf),
116	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_ddf),
117	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_ddf),
118	KOBJMETHOD(g_raid_md_free_volume,	g_raid_md_free_volume_ddf),
119	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_ddf),
120	{ 0, 0 }
121};
122
123static struct g_raid_md_class g_raid_md_ddf_class = {
124	"DDF",
125	g_raid_md_ddf_methods,
126	sizeof(struct g_raid_md_ddf_object),
127	.mdc_priority = 100
128};
129
130#define GET8(m, f)	((m)->f)
131#define GET16(m, f)	((m)->bigendian ? be16dec(&(m)->f) : le16dec(&(m)->f))
132#define GET32(m, f)	((m)->bigendian ? be32dec(&(m)->f) : le32dec(&(m)->f))
133#define GET64(m, f)	((m)->bigendian ? be64dec(&(m)->f) : le64dec(&(m)->f))
134#define GET8D(m, f)	(f)
135#define GET16D(m, f)	((m)->bigendian ? be16dec(&f) : le16dec(&f))
136#define GET32D(m, f)	((m)->bigendian ? be32dec(&f) : le32dec(&f))
137#define GET64D(m, f)	((m)->bigendian ? be64dec(&f) : le64dec(&f))
138#define GET8P(m, f)	(*(f))
139#define GET16P(m, f)	((m)->bigendian ? be16dec(f) : le16dec(f))
140#define GET32P(m, f)	((m)->bigendian ? be32dec(f) : le32dec(f))
141#define GET64P(m, f)	((m)->bigendian ? be64dec(f) : le64dec(f))
142
143#define SET8P(m, f, v)							\
144	(*(f) = (v))
145#define SET16P(m, f, v)							\
146	do {								\
147		if ((m)->bigendian)					\
148			be16enc((f), (v));				\
149		else							\
150			le16enc((f), (v));				\
151	} while (0)
152#define SET32P(m, f, v)							\
153	do {								\
154		if ((m)->bigendian)					\
155			be32enc((f), (v));				\
156		else							\
157			le32enc((f), (v));				\
158	} while (0)
159#define SET64P(m, f, v)							\
160	do {								\
161		if ((m)->bigendian)					\
162			be64enc((f), (v));				\
163		else							\
164			le64enc((f), (v));				\
165	} while (0)
166#define SET8(m, f, v)	SET8P((m), &((m)->f), (v))
167#define SET16(m, f, v)	SET16P((m), &((m)->f), (v))
168#define SET32(m, f, v)	SET32P((m), &((m)->f), (v))
169#define SET64(m, f, v)	SET64P((m), &((m)->f), (v))
170#define SET8D(m, f, v)	SET8P((m), &(f), (v))
171#define SET16D(m, f, v)	SET16P((m), &(f), (v))
172#define SET32D(m, f, v)	SET32P((m), &(f), (v))
173#define SET64D(m, f, v)	SET64P((m), &(f), (v))
174
175#define GETCRNUM(m)	(GET32((m), hdr->cr_length) /			\
176	GET16((m), hdr->Configuration_Record_Length))
177
178#define GETVDCPTR(m, n)	((struct ddf_vdc_record *)((uint8_t *)(m)->cr +	\
179	(n) * GET16((m), hdr->Configuration_Record_Length) *		\
180	(m)->sectorsize))
181
182static int
183isff(uint8_t *buf, int size)
184{
185	int i;
186
187	for (i = 0; i < size; i++)
188		if (buf[i] != 0xff)
189			return (0);
190	return (1);
191}
192
193static void
194print_guid(uint8_t *buf)
195{
196	int i, ascii;
197
198	ascii = 1;
199	for (i = 0; i < 24; i++) {
200		if (buf[i] != 0 && (buf[i] < ' ' || buf[i] > 127)) {
201			ascii = 0;
202			break;
203		}
204	}
205	if (ascii) {
206		printf("'%.24s'", buf);
207	} else {
208		for (i = 0; i < 24; i++)
209			printf("%02x", buf[i]);
210	}
211}
212
213static void
214g_raid_md_ddf_print(struct ddf_meta *meta)
215{
216	struct ddf_vdc_record *vdc;
217	struct ddf_vuc_record *vuc;
218	struct ddf_sa_record *sa;
219	uint64_t *val2;
220	uint32_t val;
221	int i, j, k, num, num2;
222
223	if (g_raid_debug < 1)
224		return;
225
226	printf("********* DDF Metadata *********\n");
227	printf("**** Header ****\n");
228	printf("DDF_Header_GUID      ");
229	print_guid(meta->hdr->DDF_Header_GUID);
230	printf("\n");
231	printf("DDF_rev              %8.8s\n", (char *)&meta->hdr->DDF_rev[0]);
232	printf("Sequence_Number      0x%08x\n", GET32(meta, hdr->Sequence_Number));
233	printf("TimeStamp            0x%08x\n", GET32(meta, hdr->TimeStamp));
234	printf("Open_Flag            0x%02x\n", GET16(meta, hdr->Open_Flag));
235	printf("Foreign_Flag         0x%02x\n", GET16(meta, hdr->Foreign_Flag));
236	printf("Diskgrouping         0x%02x\n", GET16(meta, hdr->Diskgrouping));
237	printf("Primary_Header_LBA   %ju\n", GET64(meta, hdr->Primary_Header_LBA));
238	printf("Secondary_Header_LBA %ju\n", GET64(meta, hdr->Secondary_Header_LBA));
239	printf("WorkSpace_Length     %u\n", GET32(meta, hdr->WorkSpace_Length));
240	printf("WorkSpace_LBA        %ju\n", GET64(meta, hdr->WorkSpace_LBA));
241	printf("Max_PD_Entries       %u\n", GET16(meta, hdr->Max_PD_Entries));
242	printf("Max_VD_Entries       %u\n", GET16(meta, hdr->Max_VD_Entries));
243	printf("Max_Partitions       %u\n", GET16(meta, hdr->Max_Partitions));
244	printf("Configuration_Record_Length %u\n", GET16(meta, hdr->Configuration_Record_Length));
245	printf("Max_Primary_Element_Entries %u\n", GET16(meta, hdr->Max_Primary_Element_Entries));
246	printf("Controller Data      %u:%u\n", GET32(meta, hdr->cd_section), GET32(meta, hdr->cd_length));
247	printf("Physical Disk        %u:%u\n", GET32(meta, hdr->pdr_section), GET32(meta, hdr->pdr_length));
248	printf("Virtual Disk         %u:%u\n", GET32(meta, hdr->vdr_section), GET32(meta, hdr->vdr_length));
249	printf("Configuration Recs   %u:%u\n", GET32(meta, hdr->cr_section), GET32(meta, hdr->cr_length));
250	printf("Physical Disk Recs   %u:%u\n", GET32(meta, hdr->pdd_section), GET32(meta, hdr->pdd_length));
251	printf("BBM Log              %u:%u\n", GET32(meta, hdr->bbmlog_section), GET32(meta, hdr->bbmlog_length));
252	printf("Diagnostic Space     %u:%u\n", GET32(meta, hdr->Diagnostic_Space), GET32(meta, hdr->Diagnostic_Space_Length));
253	printf("Vendor_Specific_Logs %u:%u\n", GET32(meta, hdr->Vendor_Specific_Logs), GET32(meta, hdr->Vendor_Specific_Logs_Length));
254	printf("**** Controler Data ****\n");
255	printf("Controller_GUID      ");
256	print_guid(meta->cdr->Controller_GUID);
257	printf("\n");
258	printf("Controller_Type      0x%04x%04x 0x%04x%04x\n",
259	    GET16(meta, cdr->Controller_Type.Vendor_ID),
260	    GET16(meta, cdr->Controller_Type.Device_ID),
261	    GET16(meta, cdr->Controller_Type.SubVendor_ID),
262	    GET16(meta, cdr->Controller_Type.SubDevice_ID));
263	printf("Product_ID           '%.16s'\n", (char *)&meta->cdr->Product_ID[0]);
264	printf("**** Physical Disk Records ****\n");
265	printf("Populated_PDEs       %u\n", GET16(meta, pdr->Populated_PDEs));
266	printf("Max_PDE_Supported    %u\n", GET16(meta, pdr->Max_PDE_Supported));
267	for (j = 0; j < GET16(meta, pdr->Populated_PDEs); j++) {
268		if (isff(meta->pdr->entry[j].PD_GUID, 24))
269			continue;
270		if (GET32(meta, pdr->entry[j].PD_Reference) == 0xffffffff)
271			continue;
272		printf("PD_GUID              ");
273		print_guid(meta->pdr->entry[j].PD_GUID);
274		printf("\n");
275		printf("PD_Reference         0x%08x\n",
276		    GET32(meta, pdr->entry[j].PD_Reference));
277		printf("PD_Type              0x%04x\n",
278		    GET16(meta, pdr->entry[j].PD_Type));
279		printf("PD_State             0x%04x\n",
280		    GET16(meta, pdr->entry[j].PD_State));
281		printf("Configured_Size      %ju\n",
282		    GET64(meta, pdr->entry[j].Configured_Size));
283		printf("Block_Size           %u\n",
284		    GET16(meta, pdr->entry[j].Block_Size));
285	}
286	printf("**** Virtual Disk Records ****\n");
287	printf("Populated_VDEs       %u\n", GET16(meta, vdr->Populated_VDEs));
288	printf("Max_VDE_Supported    %u\n", GET16(meta, vdr->Max_VDE_Supported));
289	for (j = 0; j < GET16(meta, vdr->Populated_VDEs); j++) {
290		if (isff(meta->vdr->entry[j].VD_GUID, 24))
291			continue;
292		printf("VD_GUID              ");
293		print_guid(meta->vdr->entry[j].VD_GUID);
294		printf("\n");
295		printf("VD_Number            0x%04x\n",
296		    GET16(meta, vdr->entry[j].VD_Number));
297		printf("VD_Type              0x%02x\n",
298		    GET8(meta, vdr->entry[j].VD_Type));
299		printf("VD_State             0x%02x\n",
300		    GET8(meta, vdr->entry[j].VD_State));
301		printf("Init_State           0x%02x\n",
302		    GET8(meta, vdr->entry[j].Init_State));
303		printf("Drive_Failures_Remaining %u\n",
304		    GET8(meta, vdr->entry[j].Drive_Failures_Remaining));
305		printf("VD_Name              '%.16s'\n",
306		    (char *)&meta->vdr->entry[j].VD_Name);
307	}
308	printf("**** Configuration Records ****\n");
309	num = GETCRNUM(meta);
310	for (j = 0; j < num; j++) {
311		vdc = GETVDCPTR(meta, j);
312		val = GET32D(meta, vdc->Signature);
313		switch (val) {
314		case DDF_VDCR_SIGNATURE:
315			printf("** Virtual Disk Configuration **\n");
316			printf("VD_GUID              ");
317			print_guid(vdc->VD_GUID);
318			printf("\n");
319			printf("Timestamp            0x%08x\n",
320			    GET32D(meta, vdc->Timestamp));
321			printf("Sequence_Number      0x%08x\n",
322			    GET32D(meta, vdc->Sequence_Number));
323			printf("Primary_Element_Count %u\n",
324			    GET16D(meta, vdc->Primary_Element_Count));
325			printf("Stripe_Size          %u\n",
326			    GET8D(meta, vdc->Stripe_Size));
327			printf("Primary_RAID_Level   0x%02x\n",
328			    GET8D(meta, vdc->Primary_RAID_Level));
329			printf("RLQ                  0x%02x\n",
330			    GET8D(meta, vdc->RLQ));
331			printf("Secondary_Element_Count %u\n",
332			    GET8D(meta, vdc->Secondary_Element_Count));
333			printf("Secondary_Element_Seq %u\n",
334			    GET8D(meta, vdc->Secondary_Element_Seq));
335			printf("Secondary_RAID_Level 0x%02x\n",
336			    GET8D(meta, vdc->Secondary_RAID_Level));
337			printf("Block_Count          %ju\n",
338			    GET64D(meta, vdc->Block_Count));
339			printf("VD_Size              %ju\n",
340			    GET64D(meta, vdc->VD_Size));
341			printf("Block_Size           %u\n",
342			    GET16D(meta, vdc->Block_Size));
343			printf("Rotate_Parity_count  %u\n",
344			    GET8D(meta, vdc->Rotate_Parity_count));
345			printf("Associated_Spare_Disks");
346			for (i = 0; i < 8; i++) {
347				if (GET32D(meta, vdc->Associated_Spares[i]) != 0xffffffff)
348					printf(" 0x%08x", GET32D(meta, vdc->Associated_Spares[i]));
349			}
350			printf("\n");
351			printf("Cache_Flags          %016jx\n",
352			    GET64D(meta, vdc->Cache_Flags));
353			printf("BG_Rate              %u\n",
354			    GET8D(meta, vdc->BG_Rate));
355			printf("MDF_Parity_Disks     %u\n",
356			    GET8D(meta, vdc->MDF_Parity_Disks));
357			printf("MDF_Parity_Generator_Polynomial 0x%04x\n",
358			    GET16D(meta, vdc->MDF_Parity_Generator_Polynomial));
359			printf("MDF_Constant_Generation_Method 0x%02x\n",
360			    GET8D(meta, vdc->MDF_Constant_Generation_Method));
361			printf("Physical_Disks      ");
362			num2 = GET16D(meta, vdc->Primary_Element_Count);
363			val2 = (uint64_t *)&(vdc->Physical_Disk_Sequence[GET16(meta, hdr->Max_Primary_Element_Entries)]);
364			for (i = 0; i < num2; i++)
365				printf(" 0x%08x @ %ju",
366				    GET32D(meta, vdc->Physical_Disk_Sequence[i]),
367				    GET64P(meta, val2 + i));
368			printf("\n");
369			break;
370		case DDF_VUCR_SIGNATURE:
371			printf("** Vendor Unique Configuration **\n");
372			vuc = (struct ddf_vuc_record *)vdc;
373			printf("VD_GUID              ");
374			print_guid(vuc->VD_GUID);
375			printf("\n");
376			break;
377		case DDF_SA_SIGNATURE:
378			printf("** Spare Assignment Configuration **\n");
379			sa = (struct ddf_sa_record *)vdc;
380			printf("Timestamp            0x%08x\n",
381			    GET32D(meta, sa->Timestamp));
382			printf("Spare_Type           0x%02x\n",
383			    GET8D(meta, sa->Spare_Type));
384			printf("Populated_SAEs       %u\n",
385			    GET16D(meta, sa->Populated_SAEs));
386			printf("MAX_SAE_Supported    %u\n",
387			    GET16D(meta, sa->MAX_SAE_Supported));
388			for (i = 0; i < GET16D(meta, sa->Populated_SAEs); i++) {
389				if (isff(sa->entry[i].VD_GUID, 24))
390					continue;
391				printf("VD_GUID             ");
392				for (k = 0; k < 24; k++)
393					printf("%02x", sa->entry[i].VD_GUID[k]);
394				printf("\n");
395				printf("Secondary_Element   %u\n",
396				    GET16D(meta, sa->entry[i].Secondary_Element));
397			}
398			break;
399		case 0xFFFFFFFF:
400			break;
401		default:
402			printf("Unknown configuration signature %08x\n", val);
403			break;
404		}
405	}
406	printf("**** Physical Disk Data ****\n");
407	printf("PD_GUID              ");
408	print_guid(meta->pdd->PD_GUID);
409	printf("\n");
410	printf("PD_Reference         0x%08x\n",
411	    GET32(meta, pdd->PD_Reference));
412	printf("Forced_Ref_Flag      0x%02x\n",
413	    GET8(meta, pdd->Forced_Ref_Flag));
414	printf("Forced_PD_GUID_Flag  0x%02x\n",
415	    GET8(meta, pdd->Forced_PD_GUID_Flag));
416}
417
418static int
419ddf_meta_find_pd(struct ddf_meta *meta, uint8_t *GUID, uint32_t PD_Reference)
420{
421	int i;
422
423	for (i = 0; i < GET16(meta, pdr->Populated_PDEs); i++) {
424		if (GUID != NULL) {
425			if (memcmp(meta->pdr->entry[i].PD_GUID, GUID, 24) == 0)
426				return (i);
427		} else if (PD_Reference != 0xffffffff) {
428			if (GET32(meta, pdr->entry[i].PD_Reference) == PD_Reference)
429				return (i);
430		} else
431			if (isff(meta->pdr->entry[i].PD_GUID, 24))
432				return (i);
433	}
434	if (GUID == NULL && PD_Reference == 0xffffffff) {
435		if (i >= GET16(meta, pdr->Max_PDE_Supported))
436			return (-1);
437		SET16(meta, pdr->Populated_PDEs, i + 1);
438		return (i);
439	}
440	return (-1);
441}
442
443static int
444ddf_meta_find_vd(struct ddf_meta *meta, uint8_t *GUID)
445{
446	int i;
447
448	for (i = 0; i < GET16(meta, vdr->Populated_VDEs); i++) {
449		if (GUID != NULL) {
450			if (memcmp(meta->vdr->entry[i].VD_GUID, GUID, 24) == 0)
451				return (i);
452		} else
453			if (isff(meta->vdr->entry[i].VD_GUID, 24))
454				return (i);
455	}
456	if (GUID == NULL) {
457		if (i >= GET16(meta, vdr->Max_VDE_Supported))
458			return (-1);
459		SET16(meta, vdr->Populated_VDEs, i + 1);
460		return (i);
461	}
462	return (-1);
463}
464
465static struct ddf_vdc_record *
466ddf_meta_find_vdc(struct ddf_meta *meta, uint8_t *GUID)
467{
468	struct ddf_vdc_record *vdc;
469	int i, num;
470
471	num = GETCRNUM(meta);
472	for (i = 0; i < num; i++) {
473		vdc = GETVDCPTR(meta, i);
474		if (GUID != NULL) {
475			if (GET32D(meta, vdc->Signature) == DDF_VDCR_SIGNATURE &&
476			    memcmp(vdc->VD_GUID, GUID, 24) == 0)
477				return (vdc);
478		} else
479			if (GET32D(meta, vdc->Signature) == 0xffffffff)
480				return (vdc);
481	}
482	return (NULL);
483}
484
485static int
486ddf_meta_count_vdc(struct ddf_meta *meta, uint8_t *GUID)
487{
488	struct ddf_vdc_record *vdc;
489	int i, num, cnt;
490
491	cnt = 0;
492	num = GETCRNUM(meta);
493	for (i = 0; i < num; i++) {
494		vdc = GETVDCPTR(meta, i);
495		if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE)
496			continue;
497		if (GUID == NULL || memcmp(vdc->VD_GUID, GUID, 24) == 0)
498			cnt++;
499	}
500	return (cnt);
501}
502
503static int
504ddf_meta_find_disk(struct ddf_vol_meta *vmeta, uint32_t PD_Reference,
505    int *bvdp, int *posp)
506{
507	int i, bvd, pos;
508
509	i = 0;
510	for (bvd = 0; bvd < GET16(vmeta, vdc->Secondary_Element_Count); bvd++) {
511		if (vmeta->bvdc[bvd] == NULL) {
512			i += GET16(vmeta, vdc->Primary_Element_Count); // XXX
513			continue;
514		}
515		for (pos = 0; pos < GET16(vmeta, bvdc[bvd]->Primary_Element_Count);
516		    pos++, i++) {
517			if (GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos]) ==
518			    PD_Reference) {
519				if (bvdp != NULL)
520					*bvdp = bvd;
521				if (posp != NULL)
522					*posp = pos;
523				return (i);
524			}
525		}
526	}
527	return (-1);
528}
529
530static void
531ddf_meta_create(struct g_raid_disk *disk, struct ddf_meta *sample)
532{
533	struct timespec ts;
534	struct clocktime ct;
535	struct g_raid_md_ddf_perdisk *pd;
536	struct ddf_meta *meta;
537	struct ddf_pd_entry *pde;
538	off_t anchorlba;
539	u_int ss, pos, size;
540	int len, error;
541	char serial_buffer[24];
542
543	if (sample->hdr == NULL)
544		sample = NULL;
545
546	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
547	meta = &pd->pd_meta;
548	ss = disk->d_consumer->provider->sectorsize;
549	anchorlba = disk->d_consumer->provider->mediasize / ss - 1;
550
551	meta->sectorsize = ss;
552	meta->bigendian = sample ? sample->bigendian : 0;
553	getnanotime(&ts);
554	clock_ts_to_ct(&ts, &ct);
555
556	/* Header */
557	meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
558	memset(meta->hdr, 0xff, ss);
559	if (sample) {
560		memcpy(meta->hdr, sample->hdr, sizeof(struct ddf_header));
561		if (ss != sample->sectorsize) {
562			SET32(meta, hdr->WorkSpace_Length,
563			    (GET32(sample, hdr->WorkSpace_Length) *
564			    sample->sectorsize + ss - 1) / ss);
565			SET16(meta, hdr->Configuration_Record_Length,
566			    (GET16(sample, hdr->Configuration_Record_Length) *
567			    sample->sectorsize + ss - 1) / ss);
568			SET32(meta, hdr->cd_length,
569			    (GET32(sample, hdr->cd_length) *
570			    sample->sectorsize + ss - 1) / ss);
571			SET32(meta, hdr->pdr_length,
572			    (GET32(sample, hdr->pdr_length) *
573			    sample->sectorsize + ss - 1) / ss);
574			SET32(meta, hdr->vdr_length,
575			    (GET32(sample, hdr->vdr_length) *
576			    sample->sectorsize + ss - 1) / ss);
577			SET32(meta, hdr->cr_length,
578			    (GET32(sample, hdr->cr_length) *
579			    sample->sectorsize + ss - 1) / ss);
580			SET32(meta, hdr->pdd_length,
581			    (GET32(sample, hdr->pdd_length) *
582			    sample->sectorsize + ss - 1) / ss);
583			SET32(meta, hdr->bbmlog_length,
584			    (GET32(sample, hdr->bbmlog_length) *
585			    sample->sectorsize + ss - 1) / ss);
586			SET32(meta, hdr->Diagnostic_Space,
587			    (GET32(sample, hdr->bbmlog_length) *
588			    sample->sectorsize + ss - 1) / ss);
589			SET32(meta, hdr->Vendor_Specific_Logs,
590			    (GET32(sample, hdr->bbmlog_length) *
591			    sample->sectorsize + ss - 1) / ss);
592		}
593	} else {
594		SET32(meta, hdr->Signature, DDF_HEADER_SIGNATURE);
595		snprintf(meta->hdr->DDF_Header_GUID, 25, "FreeBSD %08x%08x",
596		    (u_int)(ts.tv_sec - DECADE), arc4random());
597		memcpy(meta->hdr->DDF_rev, "02.00.00", 8);
598		SET32(meta, hdr->TimeStamp, (ts.tv_sec - DECADE));
599		SET32(meta, hdr->WorkSpace_Length, 16 * 1024 * 1024 / ss);
600		SET16(meta, hdr->Max_PD_Entries, DDF_MAX_DISKS - 1);
601		SET16(meta, hdr->Max_VD_Entries, DDF_MAX_VDISKS);
602		SET16(meta, hdr->Max_Partitions, DDF_MAX_PARTITIONS);
603		SET16(meta, hdr->Max_Primary_Element_Entries, DDF_MAX_DISKS);
604		SET16(meta, hdr->Configuration_Record_Length,
605		    (sizeof(struct ddf_vdc_record) +
606		     (4 + 8) * GET16(meta, hdr->Max_Primary_Element_Entries) +
607		     ss - 1) / ss);
608		SET32(meta, hdr->cd_length,
609		    (sizeof(struct ddf_cd_record) + ss - 1) / ss);
610		SET32(meta, hdr->pdr_length,
611		    (sizeof(struct ddf_pd_record) +
612		     sizeof(struct ddf_pd_entry) *
613		     GET16(meta, hdr->Max_PD_Entries) + ss - 1) / ss);
614		SET32(meta, hdr->vdr_length,
615		    (sizeof(struct ddf_vd_record) +
616		     sizeof(struct ddf_vd_entry) *
617		     GET16(meta, hdr->Max_VD_Entries) + ss - 1) / ss);
618		SET32(meta, hdr->cr_length,
619		    GET16(meta, hdr->Configuration_Record_Length) *
620		    (GET16(meta, hdr->Max_Partitions) + 1));
621		SET32(meta, hdr->pdd_length,
622		    (sizeof(struct ddf_pdd_record) + ss - 1) / ss);
623		SET32(meta, hdr->bbmlog_length, 0);
624		SET32(meta, hdr->Diagnostic_Space_Length, 0);
625		SET32(meta, hdr->Vendor_Specific_Logs_Length, 0);
626	}
627	pos = 1;
628	SET32(meta, hdr->cd_section, pos);
629	pos += GET32(meta, hdr->cd_length);
630	SET32(meta, hdr->pdr_section, pos);
631	pos += GET32(meta, hdr->pdr_length);
632	SET32(meta, hdr->vdr_section, pos);
633	pos += GET32(meta, hdr->vdr_length);
634	SET32(meta, hdr->cr_section, pos);
635	pos += GET32(meta, hdr->cr_length);
636	SET32(meta, hdr->pdd_section, pos);
637	pos += GET32(meta, hdr->pdd_length);
638	SET32(meta, hdr->bbmlog_section,
639	    GET32(meta, hdr->bbmlog_length) != 0 ? pos : 0xffffffff);
640	pos += GET32(meta, hdr->bbmlog_length);
641	SET32(meta, hdr->Diagnostic_Space,
642	    GET32(meta, hdr->Diagnostic_Space_Length) != 0 ? pos : 0xffffffff);
643	pos += GET32(meta, hdr->Diagnostic_Space_Length);
644	SET32(meta, hdr->Vendor_Specific_Logs,
645	    GET32(meta, hdr->Vendor_Specific_Logs_Length) != 0 ? pos : 0xffffffff);
646	pos += GET32(meta, hdr->Vendor_Specific_Logs_Length);
647	SET64(meta, hdr->Primary_Header_LBA,
648	    anchorlba - pos - 16);
649	SET64(meta, hdr->Secondary_Header_LBA,
650	    0xffffffffffffffffULL);
651	SET64(meta, hdr->WorkSpace_LBA,
652	    anchorlba + 1 - 32 * 1024 * 1024 / ss);
653
654	/* Controller Data */
655	size = GET32(meta, hdr->cd_length) * ss;
656	meta->cdr = malloc(size, M_MD_DDF, M_WAITOK);
657	memset(meta->cdr, 0xff, size);
658	SET32(meta, cdr->Signature, DDF_CONTROLLER_DATA_SIGNATURE);
659	memcpy(meta->cdr->Controller_GUID, "FreeBSD GEOM RAID SERIAL", 24);
660	memcpy(meta->cdr->Product_ID, "FreeBSD GEOMRAID", 16);
661
662	/* Physical Drive Records. */
663	size = GET32(meta, hdr->pdr_length) * ss;
664	meta->pdr = malloc(size, M_MD_DDF, M_WAITOK);
665	memset(meta->pdr, 0xff, size);
666	SET32(meta, pdr->Signature, DDF_PDR_SIGNATURE);
667	SET16(meta, pdr->Populated_PDEs, 1);
668	SET16(meta, pdr->Max_PDE_Supported,
669	    GET16(meta, hdr->Max_PD_Entries));
670
671	pde = &meta->pdr->entry[0];
672	len = sizeof(serial_buffer);
673	error = g_io_getattr("GEOM::ident", disk->d_consumer, &len, serial_buffer);
674	if (error == 0 && (len = strlen (serial_buffer)) >= 6 && len <= 20)
675		snprintf(pde->PD_GUID, 25, "DISK%20s", serial_buffer);
676	else
677		snprintf(pde->PD_GUID, 25, "DISK%04d%02d%02d%08x%04x",
678		    ct.year, ct.mon, ct.day,
679		    arc4random(), arc4random() & 0xffff);
680	SET32D(meta, pde->PD_Reference, arc4random());
681	SET16D(meta, pde->PD_Type, DDF_PDE_GUID_FORCE);
682	SET16D(meta, pde->PD_State, 0);
683	SET64D(meta, pde->Configured_Size,
684	    anchorlba + 1 - 32 * 1024 * 1024 / ss);
685	SET16D(meta, pde->Block_Size, ss);
686
687	/* Virtual Drive Records. */
688	size = GET32(meta, hdr->vdr_length) * ss;
689	meta->vdr = malloc(size, M_MD_DDF, M_WAITOK);
690	memset(meta->vdr, 0xff, size);
691	SET32(meta, vdr->Signature, DDF_VD_RECORD_SIGNATURE);
692	SET32(meta, vdr->Populated_VDEs, 0);
693	SET16(meta, vdr->Max_VDE_Supported,
694	    GET16(meta, hdr->Max_VD_Entries));
695
696	/* Configuration Records. */
697	size = GET32(meta, hdr->cr_length) * ss;
698	meta->cr = malloc(size, M_MD_DDF, M_WAITOK);
699	memset(meta->cr, 0xff, size);
700
701	/* Physical Disk Data. */
702	size = GET32(meta, hdr->pdd_length) * ss;
703	meta->pdd = malloc(size, M_MD_DDF, M_WAITOK);
704	memset(meta->pdd, 0xff, size);
705	SET32(meta, pdd->Signature, DDF_PDD_SIGNATURE);
706	memcpy(meta->pdd->PD_GUID, pde->PD_GUID, 24);
707	SET32(meta, pdd->PD_Reference, GET32D(meta, pde->PD_Reference));
708	SET8(meta, pdd->Forced_Ref_Flag, DDF_PDD_FORCED_REF);
709	SET8(meta, pdd->Forced_PD_GUID_Flag, DDF_PDD_FORCED_GUID);
710
711	/* Bad Block Management Log. */
712	if (GET32(meta, hdr->bbmlog_length) != 0) {
713		size = GET32(meta, hdr->bbmlog_length) * ss;
714		meta->bbm = malloc(size, M_MD_DDF, M_WAITOK);
715		memset(meta->bbm, 0xff, size);
716		SET32(meta, bbm->Signature, DDF_BBML_SIGNATURE);
717		SET32(meta, bbm->Entry_Count, 0);
718		SET32(meta, bbm->Spare_Block_Count, 0);
719	}
720}
721
722static void
723ddf_meta_copy(struct ddf_meta *dst, struct ddf_meta *src)
724{
725	struct ddf_header *hdr;
726	u_int ss;
727
728	hdr = src->hdr;
729	dst->bigendian = src->bigendian;
730	ss = dst->sectorsize = src->sectorsize;
731	dst->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
732	memcpy(dst->hdr, src->hdr, ss);
733	dst->cdr = malloc(GET32(src, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK);
734	memcpy(dst->cdr, src->cdr, GET32(src, hdr->cd_length) * ss);
735	dst->pdr = malloc(GET32(src, hdr->pdr_length) * ss, M_MD_DDF, M_WAITOK);
736	memcpy(dst->pdr, src->pdr, GET32(src, hdr->pdr_length) * ss);
737	dst->vdr = malloc(GET32(src, hdr->vdr_length) * ss, M_MD_DDF, M_WAITOK);
738	memcpy(dst->vdr, src->vdr, GET32(src, hdr->vdr_length) * ss);
739	dst->cr = malloc(GET32(src, hdr->cr_length) * ss, M_MD_DDF, M_WAITOK);
740	memcpy(dst->cr, src->cr, GET32(src, hdr->cr_length) * ss);
741	dst->pdd = malloc(GET32(src, hdr->pdd_length) * ss, M_MD_DDF, M_WAITOK);
742	memcpy(dst->pdd, src->pdd, GET32(src, hdr->pdd_length) * ss);
743	if (src->bbm != NULL) {
744		dst->bbm = malloc(GET32(src, hdr->bbmlog_length) * ss, M_MD_DDF, M_WAITOK);
745		memcpy(dst->bbm, src->bbm, GET32(src, hdr->bbmlog_length) * ss);
746	}
747}
748
749static void
750ddf_meta_update(struct ddf_meta *meta, struct ddf_meta *src)
751{
752	struct ddf_pd_entry *pde, *spde;
753	int i, j;
754
755	for (i = 0; i < GET16(src, pdr->Populated_PDEs); i++) {
756		spde = &src->pdr->entry[i];
757		if (isff(spde->PD_GUID, 24))
758			continue;
759		j = ddf_meta_find_pd(meta, NULL,
760		    src->pdr->entry[i].PD_Reference);
761		if (j < 0) {
762			j = ddf_meta_find_pd(meta, NULL, 0xffffffff);
763			pde = &meta->pdr->entry[j];
764			memcpy(pde, spde, sizeof(*pde));
765		} else {
766			pde = &meta->pdr->entry[j];
767			SET16D(meta, pde->PD_State,
768			    GET16D(meta, pde->PD_State) |
769			    GET16D(src, pde->PD_State));
770		}
771	}
772}
773
774static void
775ddf_meta_free(struct ddf_meta *meta)
776{
777
778	if (meta->hdr != NULL) {
779		free(meta->hdr, M_MD_DDF);
780		meta->hdr = NULL;
781	}
782	if (meta->cdr != NULL) {
783		free(meta->cdr, M_MD_DDF);
784		meta->cdr = NULL;
785	}
786	if (meta->pdr != NULL) {
787		free(meta->pdr, M_MD_DDF);
788		meta->pdr = NULL;
789	}
790	if (meta->vdr != NULL) {
791		free(meta->vdr, M_MD_DDF);
792		meta->vdr = NULL;
793	}
794	if (meta->cr != NULL) {
795		free(meta->cr, M_MD_DDF);
796		meta->cr = NULL;
797	}
798	if (meta->pdd != NULL) {
799		free(meta->pdd, M_MD_DDF);
800		meta->pdd = NULL;
801	}
802	if (meta->bbm != NULL) {
803		free(meta->bbm, M_MD_DDF);
804		meta->bbm = NULL;
805	}
806}
807
808static void
809ddf_vol_meta_create(struct ddf_vol_meta *meta, struct ddf_meta *sample)
810{
811	struct timespec ts;
812	struct clocktime ct;
813	struct ddf_header *hdr;
814	u_int ss, size;
815
816	hdr = sample->hdr;
817	meta->bigendian = sample->bigendian;
818	ss = meta->sectorsize = sample->sectorsize;
819	meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
820	memcpy(meta->hdr, sample->hdr, ss);
821	meta->cdr = malloc(GET32(sample, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK);
822	memcpy(meta->cdr, sample->cdr, GET32(sample, hdr->cd_length) * ss);
823	meta->vde = malloc(sizeof(struct ddf_vd_entry), M_MD_DDF, M_WAITOK);
824	memset(meta->vde, 0xff, sizeof(struct ddf_vd_entry));
825	getnanotime(&ts);
826	clock_ts_to_ct(&ts, &ct);
827	snprintf(meta->vde->VD_GUID, 25, "FreeBSD%04d%02d%02d%08x%01x",
828	    ct.year, ct.mon, ct.day,
829	    arc4random(), arc4random() & 0xf);
830	size = GET16(sample, hdr->Configuration_Record_Length) * ss;
831	meta->vdc = malloc(size, M_MD_DDF, M_WAITOK);
832	memset(meta->vdc, 0xff, size);
833	SET32(meta, vdc->Signature, DDF_VDCR_SIGNATURE);
834	memcpy(meta->vdc->VD_GUID, meta->vde->VD_GUID, 24);
835	SET32(meta, vdc->Sequence_Number, 0);
836}
837
838static void
839ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src,
840    uint8_t *GUID, int started)
841{
842	struct ddf_header *hdr;
843	struct ddf_vd_entry *vde;
844	struct ddf_vdc_record *vdc;
845	int vnew, bvnew, bvd, size;
846	u_int ss;
847
848	hdr = src->hdr;
849	vde = &src->vdr->entry[ddf_meta_find_vd(src, GUID)];
850	vdc = ddf_meta_find_vdc(src, GUID);
851	bvd = GET8D(src, vdc->Secondary_Element_Seq);
852	size = GET16(src, hdr->Configuration_Record_Length) * src->sectorsize;
853
854	if (dst->vdc == NULL ||
855	    (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
856	    GET32(dst, vdc->Sequence_Number))) > 0))
857		vnew = 1;
858	else
859		vnew = 0;
860
861	if (dst->bvdc[bvd] == NULL ||
862	    (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
863	    GET32(dst, bvdc[bvd]->Sequence_Number))) > 0))
864		bvnew = 1;
865	else
866		bvnew = 0;
867
868	if (vnew) {
869		dst->bigendian = src->bigendian;
870		ss = dst->sectorsize = src->sectorsize;
871		if (dst->hdr != NULL)
872			free(dst->hdr, M_MD_DDF);
873		dst->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
874		memcpy(dst->hdr, src->hdr, ss);
875		if (dst->cdr != NULL)
876			free(dst->cdr, M_MD_DDF);
877		dst->cdr = malloc(GET32(src, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK);
878		memcpy(dst->cdr, src->cdr, GET32(src, hdr->cd_length) * ss);
879		if (dst->vde != NULL)
880			free(dst->vde, M_MD_DDF);
881		dst->vde = malloc(sizeof(struct ddf_vd_entry), M_MD_DDF, M_WAITOK);
882		memcpy(dst->vde, vde, sizeof(struct ddf_vd_entry));
883		if (dst->vdc != NULL)
884			free(dst->vdc, M_MD_DDF);
885		dst->vdc = malloc(size, M_MD_DDF, M_WAITOK);
886		memcpy(dst->vdc, vdc, size);
887	}
888	if (bvnew) {
889		if (dst->bvdc[bvd] != NULL)
890			free(dst->bvdc[bvd], M_MD_DDF);
891		dst->bvdc[bvd] = malloc(size, M_MD_DDF, M_WAITOK);
892		memcpy(dst->bvdc[bvd], vdc, size);
893	}
894}
895
896static void
897ddf_vol_meta_free(struct ddf_vol_meta *meta)
898{
899	int i;
900
901	if (meta->hdr != NULL) {
902		free(meta->hdr, M_MD_DDF);
903		meta->hdr = NULL;
904	}
905	if (meta->cdr != NULL) {
906		free(meta->cdr, M_MD_DDF);
907		meta->cdr = NULL;
908	}
909	if (meta->vde != NULL) {
910		free(meta->vde, M_MD_DDF);
911		meta->vde = NULL;
912	}
913	if (meta->vdc != NULL) {
914		free(meta->vdc, M_MD_DDF);
915		meta->vdc = NULL;
916	}
917	for (i = 0; i < DDF_MAX_DISKS_HARD; i++) {
918		if (meta->bvdc[i] != NULL) {
919			free(meta->bvdc[i], M_MD_DDF);
920			meta->bvdc[i] = NULL;
921		}
922	}
923}
924
925static int
926ddf_meta_unused_range(struct ddf_meta *meta, off_t *off, off_t *size)
927{
928	struct ddf_vdc_record *vdc;
929	off_t beg[32], end[32], beg1, end1;
930	uint64_t *offp;
931	int i, j, n, num, pos;
932	uint32_t ref;
933
934	*off = 0;
935	*size = 0;
936	ref = GET32(meta, pdd->PD_Reference);
937	pos = ddf_meta_find_pd(meta, NULL, ref);
938	beg[0] = 0;
939	end[0] = GET64(meta, pdr->entry[pos].Configured_Size);
940	n = 1;
941	num = GETCRNUM(meta);
942	for (i = 0; i < num; i++) {
943		vdc = GETVDCPTR(meta, i);
944		if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE)
945			continue;
946		for (pos = 0; pos < GET16D(meta, vdc->Primary_Element_Count); pos++)
947			if (GET32D(meta, vdc->Physical_Disk_Sequence[pos]) == ref)
948				break;
949		if (pos == GET16D(meta, vdc->Primary_Element_Count))
950			continue;
951		offp = (uint64_t *)&(vdc->Physical_Disk_Sequence[
952		    GET16(meta, hdr->Max_Primary_Element_Entries)]);
953		beg1 = GET64P(meta, offp + pos);
954		end1 = beg1 + GET64D(meta, vdc->Block_Count);
955		for (j = 0; j < n; j++) {
956			if (beg[j] >= end1 || end[j] <= beg1 )
957				continue;
958			if (beg[j] < beg1 && end[j] > end1) {
959				beg[n] = end1;
960				end[n] = end[j];
961				end[j] = beg1;
962				n++;
963			} else if (beg[j] < beg1)
964				end[j] = beg1;
965			else
966				beg[j] = end1;
967		}
968	}
969	for (j = 0; j < n; j++) {
970		if (end[j] - beg[j] > *size) {
971			*off = beg[j];
972			*size = end[j] - beg[j];
973		}
974	}
975	return ((*size > 0) ? 1 : 0);
976}
977
978static void
979ddf_meta_get_name(struct ddf_meta *meta, int num, char *buf)
980{
981	const char *b;
982	int i;
983
984	b = meta->vdr->entry[num].VD_Name;
985	for (i = 15; i >= 0; i--)
986		if (b[i] != 0x20)
987			break;
988	memcpy(buf, b, i + 1);
989	buf[i + 1] = 0;
990}
991
992static void
993ddf_meta_put_name(struct ddf_vol_meta *meta, char *buf)
994{
995	int len;
996
997	len = min(strlen(buf), 16);
998	memset(meta->vde->VD_Name, 0x20, 16);
999	memcpy(meta->vde->VD_Name, buf, len);
1000}
1001
1002static int
1003ddf_meta_read(struct g_consumer *cp, struct ddf_meta *meta)
1004{
1005	struct g_provider *pp;
1006	struct ddf_header *ahdr, *hdr;
1007	char *abuf, *buf;
1008	off_t plba, slba, lba;
1009	int error, len, i;
1010	u_int ss;
1011	uint32_t val;
1012
1013	ddf_meta_free(meta);
1014	pp = cp->provider;
1015	ss = meta->sectorsize = pp->sectorsize;
1016	/* Read anchor block. */
1017	abuf = g_read_data(cp, pp->mediasize - ss, ss, &error);
1018	if (abuf == NULL) {
1019		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
1020		    pp->name, error);
1021		return (error);
1022	}
1023	ahdr = (struct ddf_header *)abuf;
1024
1025	/* Check if this is an DDF RAID struct */
1026	if (be32dec(&ahdr->Signature) == DDF_HEADER_SIGNATURE)
1027		meta->bigendian = 1;
1028	else if (le32dec(&ahdr->Signature) == DDF_HEADER_SIGNATURE)
1029		meta->bigendian = 0;
1030	else {
1031		G_RAID_DEBUG(1, "DDF signature check failed on %s", pp->name);
1032		error = EINVAL;
1033		goto done;
1034	}
1035	if (ahdr->Header_Type != DDF_HEADER_ANCHOR) {
1036		G_RAID_DEBUG(1, "DDF header type check failed on %s", pp->name);
1037		error = EINVAL;
1038		goto done;
1039	}
1040	meta->hdr = ahdr;
1041	plba = GET64(meta, hdr->Primary_Header_LBA);
1042	slba = GET64(meta, hdr->Secondary_Header_LBA);
1043	val = GET32(meta, hdr->CRC);
1044	SET32(meta, hdr->CRC, 0xffffffff);
1045	meta->hdr = NULL;
1046	if (crc32(ahdr, ss) != val) {
1047		G_RAID_DEBUG(1, "DDF CRC mismatch on %s", pp->name);
1048		error = EINVAL;
1049		goto done;
1050	}
1051	if ((plba + 6) * ss >= pp->mediasize) {
1052		G_RAID_DEBUG(1, "DDF primary header LBA is wrong on %s", pp->name);
1053		error = EINVAL;
1054		goto done;
1055	}
1056	if (slba != -1 && (slba + 6) * ss >= pp->mediasize) {
1057		G_RAID_DEBUG(1, "DDF secondary header LBA is wrong on %s", pp->name);
1058		error = EINVAL;
1059		goto done;
1060	}
1061	lba = plba;
1062
1063doread:
1064	error = 0;
1065	ddf_meta_free(meta);
1066
1067	/* Read header block. */
1068	buf = g_read_data(cp, lba * ss, ss, &error);
1069	if (buf == NULL) {
1070readerror:
1071		G_RAID_DEBUG(1, "DDF %s metadata read error on %s (error=%d).",
1072		    (lba == plba) ? "primary" : "secondary", pp->name, error);
1073		if (lba == plba && slba != -1) {
1074			lba = slba;
1075			goto doread;
1076		}
1077		G_RAID_DEBUG(1, "DDF metadata read error on %s.", pp->name);
1078		goto done;
1079	}
1080	meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
1081	memcpy(meta->hdr, buf, ss);
1082	g_free(buf);
1083	hdr = meta->hdr;
1084	val = GET32(meta, hdr->CRC);
1085	SET32(meta, hdr->CRC, 0xffffffff);
1086	if (hdr->Signature != ahdr->Signature ||
1087	    crc32(meta->hdr, ss) != val ||
1088	    memcmp(hdr->DDF_Header_GUID, ahdr->DDF_Header_GUID, 24) ||
1089	    GET64(meta, hdr->Primary_Header_LBA) != plba ||
1090	    GET64(meta, hdr->Secondary_Header_LBA) != slba) {
1091hdrerror:
1092		G_RAID_DEBUG(1, "DDF %s metadata check failed on %s",
1093		    (lba == plba) ? "primary" : "secondary", pp->name);
1094		if (lba == plba && slba != -1) {
1095			lba = slba;
1096			goto doread;
1097		}
1098		G_RAID_DEBUG(1, "DDF metadata check failed on %s", pp->name);
1099		error = EINVAL;
1100		goto done;
1101	}
1102	if ((lba == plba && hdr->Header_Type != DDF_HEADER_PRIMARY) ||
1103	    (lba == slba && hdr->Header_Type != DDF_HEADER_SECONDARY))
1104		goto hdrerror;
1105	len = 1;
1106	len = max(len, GET32(meta, hdr->cd_section) + GET32(meta, hdr->cd_length));
1107	len = max(len, GET32(meta, hdr->pdr_section) + GET32(meta, hdr->pdr_length));
1108	len = max(len, GET32(meta, hdr->vdr_section) + GET32(meta, hdr->vdr_length));
1109	len = max(len, GET32(meta, hdr->cr_section) + GET32(meta, hdr->cr_length));
1110	len = max(len, GET32(meta, hdr->pdd_section) + GET32(meta, hdr->pdd_length));
1111	if ((val = GET32(meta, hdr->bbmlog_section)) != 0xffffffff)
1112		len = max(len, val + GET32(meta, hdr->bbmlog_length));
1113	if ((val = GET32(meta, hdr->Diagnostic_Space)) != 0xffffffff)
1114		len = max(len, val + GET32(meta, hdr->Diagnostic_Space_Length));
1115	if ((val = GET32(meta, hdr->Vendor_Specific_Logs)) != 0xffffffff)
1116		len = max(len, val + GET32(meta, hdr->Vendor_Specific_Logs_Length));
1117	if ((plba + len) * ss >= pp->mediasize)
1118		goto hdrerror;
1119	if (slba != -1 && (slba + len) * ss >= pp->mediasize)
1120		goto hdrerror;
1121	/* Workaround for Adaptec implementation. */
1122	if (GET16(meta, hdr->Max_Primary_Element_Entries) == 0xffff) {
1123		SET16(meta, hdr->Max_Primary_Element_Entries,
1124		    min(GET16(meta, hdr->Max_PD_Entries),
1125		    (GET16(meta, hdr->Configuration_Record_Length) * ss - 512) / 12));
1126	}
1127
1128	/* Read controller data. */
1129	buf = g_read_data(cp, (lba + GET32(meta, hdr->cd_section)) * ss,
1130	    GET32(meta, hdr->cd_length) * ss, &error);
1131	if (buf == NULL)
1132		goto readerror;
1133	meta->cdr = malloc(GET32(meta, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK);
1134	memcpy(meta->cdr, buf, GET32(meta, hdr->cd_length) * ss);
1135	g_free(buf);
1136	if (GET32(meta, cdr->Signature) != DDF_CONTROLLER_DATA_SIGNATURE)
1137		goto hdrerror;
1138
1139	/* Read physical disk records. */
1140	buf = g_read_data(cp, (lba + GET32(meta, hdr->pdr_section)) * ss,
1141	    GET32(meta, hdr->pdr_length) * ss, &error);
1142	if (buf == NULL)
1143		goto readerror;
1144	meta->pdr = malloc(GET32(meta, hdr->pdr_length) * ss, M_MD_DDF, M_WAITOK);
1145	memcpy(meta->pdr, buf, GET32(meta, hdr->pdr_length) * ss);
1146	g_free(buf);
1147	if (GET32(meta, pdr->Signature) != DDF_PDR_SIGNATURE)
1148		goto hdrerror;
1149
1150	/* Read virtual disk records. */
1151	buf = g_read_data(cp, (lba + GET32(meta, hdr->vdr_section)) * ss,
1152	    GET32(meta, hdr->vdr_length) * ss, &error);
1153	if (buf == NULL)
1154		goto readerror;
1155	meta->vdr = malloc(GET32(meta, hdr->vdr_length) * ss, M_MD_DDF, M_WAITOK);
1156	memcpy(meta->vdr, buf, GET32(meta, hdr->vdr_length) * ss);
1157	g_free(buf);
1158	if (GET32(meta, vdr->Signature) != DDF_VD_RECORD_SIGNATURE)
1159		goto hdrerror;
1160
1161	/* Read configuration records. */
1162	buf = g_read_data(cp, (lba + GET32(meta, hdr->cr_section)) * ss,
1163	    GET32(meta, hdr->cr_length) * ss, &error);
1164	if (buf == NULL)
1165		goto readerror;
1166	meta->cr = malloc(GET32(meta, hdr->cr_length) * ss, M_MD_DDF, M_WAITOK);
1167	memcpy(meta->cr, buf, GET32(meta, hdr->cr_length) * ss);
1168	g_free(buf);
1169
1170	/* Read physical disk data. */
1171	buf = g_read_data(cp, (lba + GET32(meta, hdr->pdd_section)) * ss,
1172	    GET32(meta, hdr->pdd_length) * ss, &error);
1173	if (buf == NULL)
1174		goto readerror;
1175	meta->pdd = malloc(GET32(meta, hdr->pdd_length) * ss, M_MD_DDF, M_WAITOK);
1176	memcpy(meta->pdd, buf, GET32(meta, hdr->pdd_length) * ss);
1177	g_free(buf);
1178	if (GET32(meta, pdd->Signature) != DDF_PDD_SIGNATURE)
1179		goto hdrerror;
1180	i = ddf_meta_find_pd(meta, NULL, GET32(meta, pdd->PD_Reference));
1181	if (i < 0)
1182		goto hdrerror;
1183
1184	/* Read BBM Log. */
1185	if (GET32(meta, hdr->bbmlog_section) != 0xffffffff &&
1186	    GET32(meta, hdr->bbmlog_length) != 0) {
1187		buf = g_read_data(cp, (lba + GET32(meta, hdr->bbmlog_section)) * ss,
1188		    GET32(meta, hdr->bbmlog_length) * ss, &error);
1189		if (buf == NULL)
1190			goto readerror;
1191		meta->bbm = malloc(GET32(meta, hdr->bbmlog_length) * ss, M_MD_DDF, M_WAITOK);
1192		memcpy(meta->bbm, buf, GET32(meta, hdr->bbmlog_length) * ss);
1193		g_free(buf);
1194		if (GET32(meta, bbm->Signature) != DDF_BBML_SIGNATURE)
1195			goto hdrerror;
1196	}
1197
1198done:
1199	free(abuf, M_MD_DDF);
1200	if (error != 0)
1201		ddf_meta_free(meta);
1202	return (error);
1203}
1204
1205static int
1206ddf_meta_write(struct g_consumer *cp, struct ddf_meta *meta)
1207{
1208	struct g_provider *pp;
1209	struct ddf_vdc_record *vdc;
1210	off_t alba, plba, slba, lba;
1211	u_int ss, size;
1212	int error, i, num;
1213
1214	pp = cp->provider;
1215	ss = pp->sectorsize;
1216	lba = alba = pp->mediasize / ss - 1;
1217	plba = GET64(meta, hdr->Primary_Header_LBA);
1218	slba = GET64(meta, hdr->Secondary_Header_LBA);
1219
1220next:
1221	SET8(meta, hdr->Header_Type, (lba == alba) ? DDF_HEADER_ANCHOR :
1222	    (lba == plba) ? DDF_HEADER_PRIMARY : DDF_HEADER_SECONDARY);
1223	SET32(meta, hdr->CRC, 0xffffffff);
1224	SET32(meta, hdr->CRC, crc32(meta->hdr, ss));
1225	error = g_write_data(cp, lba * ss, meta->hdr, ss);
1226	if (error != 0) {
1227err:
1228		G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
1229		    pp->name, error);
1230		if (lba != alba)
1231			goto done;
1232	}
1233	if (lba == alba) {
1234		lba = plba;
1235		goto next;
1236	}
1237
1238	size = GET32(meta, hdr->cd_length) * ss;
1239	SET32(meta, cdr->CRC, 0xffffffff);
1240	SET32(meta, cdr->CRC, crc32(meta->cdr, size));
1241	error = g_write_data(cp, (lba + GET32(meta, hdr->cd_section)) * ss,
1242	    meta->cdr, size);
1243	if (error != 0)
1244		goto err;
1245
1246	size = GET32(meta, hdr->pdr_length) * ss;
1247	SET32(meta, pdr->CRC, 0xffffffff);
1248	SET32(meta, pdr->CRC, crc32(meta->pdr, size));
1249	error = g_write_data(cp, (lba + GET32(meta, hdr->pdr_section)) * ss,
1250	    meta->pdr, size);
1251	if (error != 0)
1252		goto err;
1253
1254	size = GET32(meta, hdr->vdr_length) * ss;
1255	SET32(meta, vdr->CRC, 0xffffffff);
1256	SET32(meta, vdr->CRC, crc32(meta->vdr, size));
1257	error = g_write_data(cp, (lba + GET32(meta, hdr->vdr_section)) * ss,
1258	    meta->vdr, size);
1259	if (error != 0)
1260		goto err;
1261
1262	size = GET16(meta, hdr->Configuration_Record_Length) * ss;
1263	num = GETCRNUM(meta);
1264	for (i = 0; i < num; i++) {
1265		vdc = GETVDCPTR(meta, i);
1266		SET32D(meta, vdc->CRC, 0xffffffff);
1267		SET32D(meta, vdc->CRC, crc32(vdc, size));
1268	}
1269	error = g_write_data(cp, (lba + GET32(meta, hdr->cr_section)) * ss,
1270	    meta->cr, size * num);
1271	if (error != 0)
1272		goto err;
1273
1274	size = GET32(meta, hdr->pdd_length) * ss;
1275	SET32(meta, pdd->CRC, 0xffffffff);
1276	SET32(meta, pdd->CRC, crc32(meta->pdd, size));
1277	error = g_write_data(cp, (lba + GET32(meta, hdr->pdd_section)) * ss,
1278	    meta->pdd, size);
1279	if (error != 0)
1280		goto err;
1281
1282	if (GET32(meta, hdr->bbmlog_length) != 0) {
1283		size = GET32(meta, hdr->bbmlog_length) * ss;
1284		SET32(meta, bbm->CRC, 0xffffffff);
1285		SET32(meta, bbm->CRC, crc32(meta->bbm, size));
1286		error = g_write_data(cp,
1287		    (lba + GET32(meta, hdr->bbmlog_section)) * ss,
1288		    meta->bbm, size);
1289		if (error != 0)
1290			goto err;
1291	}
1292
1293done:
1294	if (lba == plba && slba != -1) {
1295		lba = slba;
1296		goto next;
1297	}
1298
1299	return (error);
1300}
1301
1302static int
1303ddf_meta_erase(struct g_consumer *cp)
1304{
1305	struct g_provider *pp;
1306	char *buf;
1307	int error;
1308
1309	pp = cp->provider;
1310	buf = malloc(pp->sectorsize, M_MD_DDF, M_WAITOK | M_ZERO);
1311	error = g_write_data(cp, pp->mediasize - pp->sectorsize,
1312	    buf, pp->sectorsize);
1313	if (error != 0) {
1314		G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
1315		    pp->name, error);
1316	}
1317	free(buf, M_MD_DDF);
1318	return (error);
1319}
1320
1321#if 0
1322static int
1323ddf_meta_write_spare(struct g_consumer *cp)
1324{
1325	struct ddf_header *meta;
1326	int error;
1327
1328	meta = malloc(sizeof(*meta), M_MD_DDF, M_WAITOK | M_ZERO);
1329	memcpy(&meta->ddf_id[0], DDF_MAGIC, sizeof(DDF_MAGIC) - 1);
1330	meta->dummy_0 = 0x00020000;
1331	meta->integrity = DDF_I_VALID;
1332	meta->disk.flags = DDF_F_SPARE | DDF_F_ONLINE | DDF_F_VALID;
1333	meta->disk.number = 0xff;
1334	arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0);
1335	meta->disk_sectors = cp->provider->mediasize / cp->provider->sectorsize;
1336	meta->disk_sectors -= 131072;
1337	meta->rebuild_lba = UINT32_MAX;
1338	error = ddf_meta_write(cp, &meta, 1);
1339	free(meta, M_MD_DDF);
1340	return (error);
1341}
1342#endif
1343
1344static struct g_raid_volume *
1345g_raid_md_ddf_get_volume(struct g_raid_softc *sc, uint8_t *GUID)
1346{
1347	struct g_raid_volume	*vol;
1348	struct g_raid_md_ddf_pervolume *pv;
1349
1350	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1351		pv = vol->v_md_data;
1352		if (memcmp(pv->pv_meta.vde->VD_GUID, GUID, 24) == 0)
1353			break;
1354	}
1355	return (vol);
1356}
1357
1358static struct g_raid_disk *
1359g_raid_md_ddf_get_disk(struct g_raid_softc *sc, uint8_t *GUID, uint32_t id)
1360{
1361	struct g_raid_disk	*disk;
1362	struct g_raid_md_ddf_perdisk *pd;
1363	struct ddf_meta *meta;
1364
1365	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1366		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
1367		meta = &pd->pd_meta;
1368		if (GUID != NULL) {
1369			if (memcmp(meta->pdd->PD_GUID, GUID, 24) == 0)
1370				break;
1371		} else {
1372			if (GET32(meta, pdd->PD_Reference) == id)
1373				break;
1374		}
1375	}
1376	return (disk);
1377}
1378
1379static int
1380g_raid_md_ddf_purge_volumes(struct g_raid_softc *sc)
1381{
1382	struct g_raid_volume	*vol, *tvol;
1383	struct g_raid_md_ddf_pervolume *pv;
1384	int i, res;
1385
1386	res = 0;
1387	TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tvol) {
1388		pv = vol->v_md_data;
1389		if (vol->v_stopping)
1390			continue;
1391		for (i = 0; i < vol->v_disks_count; i++) {
1392			if (vol->v_subdisks[i].sd_state != G_RAID_SUBDISK_S_NONE)
1393				break;
1394		}
1395		if (i >= vol->v_disks_count) {
1396			g_raid_destroy_volume(vol);
1397			res = 1;
1398		}
1399	}
1400	return (res);
1401}
1402
1403static int
1404g_raid_md_ddf_purge_disks(struct g_raid_softc *sc)
1405{
1406#if 0
1407	struct g_raid_disk	*disk, *tdisk;
1408	struct g_raid_volume	*vol;
1409	struct g_raid_md_ddf_perdisk *pd;
1410	int i, j, res;
1411
1412	res = 0;
1413	TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
1414		if (disk->d_state == G_RAID_DISK_S_SPARE)
1415			continue;
1416		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
1417
1418		/* Scan for deleted volumes. */
1419		for (i = 0; i < pd->pd_subdisks; ) {
1420			vol = g_raid_md_ddf_get_volume(sc,
1421			    pd->pd_meta[i]->volume_id);
1422			if (vol != NULL && !vol->v_stopping) {
1423				i++;
1424				continue;
1425			}
1426			free(pd->pd_meta[i], M_MD_DDF);
1427			for (j = i; j < pd->pd_subdisks - 1; j++)
1428				pd->pd_meta[j] = pd->pd_meta[j + 1];
1429			pd->pd_meta[DDF_MAX_SUBDISKS - 1] = NULL;
1430			pd->pd_subdisks--;
1431			pd->pd_updated = 1;
1432		}
1433
1434		/* If there is no metadata left - erase and delete disk. */
1435		if (pd->pd_subdisks == 0) {
1436			ddf_meta_erase(disk->d_consumer);
1437			g_raid_destroy_disk(disk);
1438			res = 1;
1439		}
1440	}
1441	return (res);
1442#endif
1443	return (0);
1444}
1445
1446static int
1447g_raid_md_ddf_supported(int level, int qual, int disks, int force)
1448{
1449
1450	if (disks > DDF_MAX_DISKS_HARD)
1451		return (0);
1452	switch (level) {
1453	case G_RAID_VOLUME_RL_RAID0:
1454		if (qual != G_RAID_VOLUME_RLQ_NONE)
1455			return (0);
1456		if (disks < 1)
1457			return (0);
1458		if (!force && disks < 2)
1459			return (0);
1460		break;
1461	case G_RAID_VOLUME_RL_RAID1:
1462		if (disks < 1)
1463			return (0);
1464		if (qual == G_RAID_VOLUME_RLQ_R1SM) {
1465			if (!force && disks != 2)
1466				return (0);
1467		} else if (qual == G_RAID_VOLUME_RLQ_R1MM) {
1468			if (!force && disks != 3)
1469				return (0);
1470		} else
1471			return (0);
1472		break;
1473	case G_RAID_VOLUME_RL_RAID3:
1474		if (qual != G_RAID_VOLUME_RLQ_R3P0 &&
1475		    qual != G_RAID_VOLUME_RLQ_R3PN)
1476			return (0);
1477		if (disks < 3)
1478			return (0);
1479		break;
1480	case G_RAID_VOLUME_RL_RAID4:
1481		if (qual != G_RAID_VOLUME_RLQ_R4P0 &&
1482		    qual != G_RAID_VOLUME_RLQ_R4PN)
1483			return (0);
1484		if (disks < 3)
1485			return (0);
1486		break;
1487	case G_RAID_VOLUME_RL_RAID5:
1488		if (qual != G_RAID_VOLUME_RLQ_R5RA &&
1489		    qual != G_RAID_VOLUME_RLQ_R5RS &&
1490		    qual != G_RAID_VOLUME_RLQ_R5LA &&
1491		    qual != G_RAID_VOLUME_RLQ_R5LS)
1492			return (0);
1493		if (disks < 3)
1494			return (0);
1495		break;
1496	case G_RAID_VOLUME_RL_RAID6:
1497		if (qual != G_RAID_VOLUME_RLQ_R6RA &&
1498		    qual != G_RAID_VOLUME_RLQ_R6RS &&
1499		    qual != G_RAID_VOLUME_RLQ_R6LA &&
1500		    qual != G_RAID_VOLUME_RLQ_R6LS)
1501			return (0);
1502		if (disks < 4)
1503			return (0);
1504		break;
1505	case G_RAID_VOLUME_RL_RAIDMDF:
1506		if (qual != G_RAID_VOLUME_RLQ_RMDFRA &&
1507		    qual != G_RAID_VOLUME_RLQ_RMDFRS &&
1508		    qual != G_RAID_VOLUME_RLQ_RMDFLA &&
1509		    qual != G_RAID_VOLUME_RLQ_RMDFLS)
1510			return (0);
1511		if (disks < 5)
1512			return (0);
1513		break;
1514	case G_RAID_VOLUME_RL_RAID1E:
1515		if (qual != G_RAID_VOLUME_RLQ_R1EA &&
1516		    qual != G_RAID_VOLUME_RLQ_R1EO)
1517			return (0);
1518		if (disks < 3)
1519			return (0);
1520		break;
1521	case G_RAID_VOLUME_RL_SINGLE:
1522		if (qual != G_RAID_VOLUME_RLQ_NONE)
1523			return (0);
1524		if (disks != 1)
1525			return (0);
1526		break;
1527	case G_RAID_VOLUME_RL_CONCAT:
1528		if (qual != G_RAID_VOLUME_RLQ_NONE)
1529			return (0);
1530		if (disks < 2)
1531			return (0);
1532		break;
1533	case G_RAID_VOLUME_RL_RAID5E:
1534		if (qual != G_RAID_VOLUME_RLQ_R5ERA &&
1535		    qual != G_RAID_VOLUME_RLQ_R5ERS &&
1536		    qual != G_RAID_VOLUME_RLQ_R5ELA &&
1537		    qual != G_RAID_VOLUME_RLQ_R5ELS)
1538			return (0);
1539		if (disks < 4)
1540			return (0);
1541		break;
1542	case G_RAID_VOLUME_RL_RAID5EE:
1543		if (qual != G_RAID_VOLUME_RLQ_R5EERA &&
1544		    qual != G_RAID_VOLUME_RLQ_R5EERS &&
1545		    qual != G_RAID_VOLUME_RLQ_R5EELA &&
1546		    qual != G_RAID_VOLUME_RLQ_R5EELS)
1547			return (0);
1548		if (disks < 4)
1549			return (0);
1550		break;
1551	case G_RAID_VOLUME_RL_RAID5R:
1552		if (qual != G_RAID_VOLUME_RLQ_R5RRA &&
1553		    qual != G_RAID_VOLUME_RLQ_R5RRS &&
1554		    qual != G_RAID_VOLUME_RLQ_R5RLA &&
1555		    qual != G_RAID_VOLUME_RLQ_R5RLS)
1556			return (0);
1557		if (disks < 3)
1558			return (0);
1559		break;
1560	default:
1561		return (0);
1562	}
1563	return (1);
1564}
1565
1566static int
1567g_raid_md_ddf_start_disk(struct g_raid_disk *disk, struct g_raid_volume *vol)
1568{
1569	struct g_raid_softc *sc;
1570	struct g_raid_subdisk *sd;
1571	struct g_raid_md_ddf_perdisk *pd;
1572	struct g_raid_md_ddf_pervolume *pv;
1573	struct g_raid_md_ddf_object *mdi;
1574	struct ddf_vol_meta *vmeta;
1575	struct ddf_meta *pdmeta, *gmeta;
1576	struct ddf_vdc_record *vdc1;
1577	off_t size, eoff = 0, esize = 0;
1578	uint64_t *val2;
1579	int disk_pos, md_disk_bvd = -1, md_disk_pos = -1, md_pde_pos;
1580	int i, resurrection = 0;
1581	uint32_t reference;
1582
1583	sc = disk->d_softc;
1584	mdi = (struct g_raid_md_ddf_object *)sc->sc_md;
1585	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
1586	pdmeta = &pd->pd_meta;
1587	reference = GET32(&pd->pd_meta, pdd->PD_Reference);
1588
1589	pv = vol->v_md_data;
1590	vmeta = &pv->pv_meta;
1591	gmeta = &mdi->mdio_meta;
1592
1593	/* Find disk position in metadata by it's reference. */
1594	disk_pos = ddf_meta_find_disk(vmeta, reference,
1595	    &md_disk_bvd, &md_disk_pos);
1596	md_pde_pos = ddf_meta_find_pd(gmeta, NULL, reference);
1597
1598	if (disk_pos < 0) {
1599		G_RAID_DEBUG1(1, sc, "Disk %s is not part of the volume %s",
1600		    g_raid_get_diskname(disk), vol->v_name);
1601
1602		/* Failed stale disk is useless for us. */
1603		if ((GET16(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA) != 0) {
1604			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
1605			return (0);
1606		}
1607
1608		/* If disk has some metadata for this volume - erase. */
1609		if (pdmeta->cr != NULL &&
1610		    (vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL) {
1611			SET32D(pdmeta, vdc1->Signature, 0xffffffff);
1612		}
1613
1614		/* If we are in the start process, that's all for now. */
1615		if (!pv->pv_started)
1616			goto nofit;
1617		/*
1618		 * If we have already started - try to get use of the disk.
1619		 * Try to replace OFFLINE disks first, then FAILED.
1620		 */
1621		if (ddf_meta_count_vdc(&pd->pd_meta, NULL) >=
1622			GET16(&pd->pd_meta, hdr->Max_Partitions)) {
1623			G_RAID_DEBUG1(1, sc, "No free partitions on disk %s",
1624			    g_raid_get_diskname(disk));
1625			goto nofit;
1626		}
1627		ddf_meta_unused_range(&pd->pd_meta, &eoff, &esize);
1628		if (esize == 0) {
1629			G_RAID_DEBUG1(1, sc, "No free space on disk %s",
1630			    g_raid_get_diskname(disk));
1631			goto nofit;
1632		}
1633		size = INT64_MAX;
1634		for (i = 0; i < vol->v_disks_count; i++) {
1635			sd = &vol->v_subdisks[i];
1636			if (sd->sd_state != G_RAID_SUBDISK_S_NONE)
1637				size = sd->sd_size;
1638			if (sd->sd_state <= G_RAID_SUBDISK_S_FAILED &&
1639			    (disk_pos < 0 ||
1640			     vol->v_subdisks[i].sd_state < sd->sd_state))
1641				disk_pos = i;
1642		}
1643		if (disk_pos >= 0 &&
1644		    vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT &&
1645		    (off_t)esize * 512 < size) {
1646			G_RAID_DEBUG1(1, sc, "Disk %s free space "
1647			    "is too small (%ju < %ju)",
1648			    g_raid_get_diskname(disk),
1649			    (off_t)esize * 512, size);
1650			disk_pos = -1;
1651		}
1652		if (disk_pos >= 0) {
1653			if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT)
1654				esize = size / 512;
1655			md_disk_bvd = disk_pos / GET16(vmeta, vdc->Primary_Element_Count); // XXX
1656			md_disk_pos = disk_pos % GET16(vmeta, vdc->Primary_Element_Count); // XXX
1657		} else {
1658nofit:
1659			if (ddf_meta_count_vdc(&pd->pd_meta, NULL) == 0) {
1660				g_raid_change_disk_state(disk,
1661				    G_RAID_DISK_S_SPARE);
1662			}
1663			return (0);
1664		}
1665		G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s",
1666		    g_raid_get_diskname(disk), disk_pos, vol->v_name);
1667		resurrection = 1;
1668	}
1669
1670	sd = &vol->v_subdisks[disk_pos];
1671
1672	if (resurrection && sd->sd_disk != NULL) {
1673		g_raid_change_disk_state(sd->sd_disk,
1674		    G_RAID_DISK_S_STALE_FAILED);
1675		TAILQ_REMOVE(&sd->sd_disk->d_subdisks,
1676		    sd, sd_next);
1677	}
1678	vol->v_subdisks[disk_pos].sd_disk = disk;
1679	TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1680
1681	/* Welcome the new disk. */
1682	if (resurrection)
1683		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
1684	else if (GET8(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA)
1685		g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
1686	else
1687		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
1688
1689	if (resurrection) {
1690		sd->sd_offset = (off_t)eoff * 512;
1691		sd->sd_size = (off_t)esize * 512;
1692	} else if (pdmeta->cr != NULL &&
1693	    (vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL) {
1694		val2 = (uint64_t *)&(vdc1->Physical_Disk_Sequence[GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
1695		sd->sd_offset = (off_t)GET64P(pdmeta, val2 + md_disk_pos) * 512;
1696		sd->sd_size = (off_t)GET64D(pdmeta, vdc1->Block_Count) * 512;
1697	}
1698
1699	if (resurrection) {
1700		/* Stale disk, almost same as new. */
1701		g_raid_change_subdisk_state(sd,
1702		    G_RAID_SUBDISK_S_NEW);
1703	} else if (GET8(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA) {
1704		/* Failed disk. */
1705		g_raid_change_subdisk_state(sd,
1706		    G_RAID_SUBDISK_S_FAILED);
1707	} else if ((GET8(gmeta, pdr->entry[md_pde_pos].PD_State) &
1708	     (DDF_PDE_FAILED | DDF_PDE_REBUILD)) != 0) {
1709		/* Rebuilding disk. */
1710		g_raid_change_subdisk_state(sd,
1711		    G_RAID_SUBDISK_S_REBUILD);
1712		sd->sd_rebuild_pos = 0;
1713	} else if ((GET8(vmeta, vde->VD_State) & DDF_VDE_DIRTY) != 0 ||
1714	    (GET8(vmeta, vde->Init_State) & DDF_VDE_INIT_MASK) !=
1715	     DDF_VDE_INIT_FULL) {
1716		/* Stale disk or dirty volume (unclean shutdown). */
1717		g_raid_change_subdisk_state(sd,
1718		    G_RAID_SUBDISK_S_STALE);
1719	} else {
1720		/* Up to date disk. */
1721		g_raid_change_subdisk_state(sd,
1722		    G_RAID_SUBDISK_S_ACTIVE);
1723	}
1724	g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1725	    G_RAID_EVENT_SUBDISK);
1726
1727	return (resurrection);
1728}
1729
1730static void
1731g_raid_md_ddf_refill(struct g_raid_softc *sc)
1732{
1733	struct g_raid_volume *vol;
1734	struct g_raid_subdisk *sd;
1735	struct g_raid_disk *disk;
1736	struct g_raid_md_object *md;
1737	struct g_raid_md_ddf_perdisk *pd;
1738	struct g_raid_md_ddf_pervolume *pv;
1739	int update, updated, i, bad;
1740
1741	md = sc->sc_md;
1742restart:
1743	updated = 0;
1744	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1745		pv = vol->v_md_data;
1746		if (!pv->pv_started || vol->v_stopping)
1747			continue;
1748
1749		/* Search for subdisk that needs replacement. */
1750		bad = 0;
1751		for (i = 0; i < vol->v_disks_count; i++) {
1752			sd = &vol->v_subdisks[i];
1753			if (sd->sd_state == G_RAID_SUBDISK_S_NONE ||
1754			    sd->sd_state == G_RAID_SUBDISK_S_FAILED)
1755			        bad = 1;
1756		}
1757		if (!bad)
1758			continue;
1759
1760		G_RAID_DEBUG1(1, sc, "Volume %s is not complete, "
1761		    "trying to refill.", vol->v_name);
1762
1763		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1764			/* Skip failed. */
1765			if (disk->d_state < G_RAID_DISK_S_SPARE)
1766				continue;
1767			/* Skip already used by this volume. */
1768			for (i = 0; i < vol->v_disks_count; i++) {
1769				sd = &vol->v_subdisks[i];
1770				if (sd->sd_disk == disk)
1771					break;
1772			}
1773			if (i < vol->v_disks_count)
1774				continue;
1775
1776			/* Try to use disk if it has empty extents. */
1777			pd = disk->d_md_data;
1778			if (ddf_meta_count_vdc(&pd->pd_meta, NULL) <
1779			    GET16(&pd->pd_meta, hdr->Max_Partitions)) {
1780				update = g_raid_md_ddf_start_disk(disk, vol);
1781			} else
1782				update = 0;
1783			if (update) {
1784				updated = 1;
1785				g_raid_md_write_ddf(md, vol, NULL, disk);
1786				break;
1787			}
1788		}
1789	}
1790	if (updated)
1791		goto restart;
1792}
1793
1794static void
1795g_raid_md_ddf_start(struct g_raid_volume *vol)
1796{
1797	struct g_raid_softc *sc;
1798	struct g_raid_subdisk *sd;
1799	struct g_raid_disk *disk;
1800	struct g_raid_md_object *md;
1801	struct g_raid_md_ddf_pervolume *pv;
1802	struct g_raid_md_ddf_object *mdi;
1803	struct ddf_vol_meta *vmeta;
1804	struct ddf_vdc_record *vdc;
1805	uint64_t *val2;
1806	int i, j, bvd;
1807
1808	sc = vol->v_softc;
1809	md = sc->sc_md;
1810	mdi = (struct g_raid_md_ddf_object *)md;
1811	pv = vol->v_md_data;
1812	vmeta = &pv->pv_meta;
1813	vdc = vmeta->vdc;
1814
1815	vol->v_raid_level = GET8(vmeta, vdc->Primary_RAID_Level);
1816	vol->v_raid_level_qualifier = GET8(vmeta, vdc->RLQ);
1817	if (GET8(vmeta, vdc->Secondary_Element_Count) > 1 &&
1818	    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 &&
1819	    GET8(vmeta, vdc->Secondary_RAID_Level) == 0)
1820		vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
1821	vol->v_sectorsize = GET16(vmeta, vdc->Block_Size);
1822	if (vol->v_sectorsize == 0xffff)
1823		vol->v_sectorsize = vmeta->sectorsize;
1824	vol->v_strip_size = vol->v_sectorsize << GET8(vmeta, vdc->Stripe_Size);
1825	vol->v_disks_count = GET16(vmeta, vdc->Primary_Element_Count) *
1826	    GET8(vmeta, vdc->Secondary_Element_Count);
1827	vol->v_mediasize = GET64(vmeta, vdc->VD_Size) * vol->v_sectorsize;
1828	for (i = 0, j = 0, bvd = 0; i < vol->v_disks_count; i++, j++) {
1829		if (j == GET16(vmeta, vdc->Primary_Element_Count)) {
1830			j = 0;
1831			bvd++;
1832		}
1833		sd = &vol->v_subdisks[i];
1834		if (vmeta->bvdc[bvd] == NULL) {
1835			sd->sd_offset = 0;
1836			sd->sd_size = GET64(vmeta, vdc->Block_Count) *
1837			    vol->v_sectorsize;
1838			continue;
1839		}
1840		val2 = (uint64_t *)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[
1841		    GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
1842		sd->sd_offset = GET64P(vmeta, val2 + j) * vol->v_sectorsize;
1843		sd->sd_size = GET64(vmeta, bvdc[bvd]->Block_Count) *
1844		    vol->v_sectorsize;
1845	}
1846	g_raid_start_volume(vol);
1847
1848	/* Make all disks found till the moment take their places. */
1849	for (i = 0, j = 0, bvd = 0; i < vol->v_disks_count; i++, j++) {
1850		if (j == GET16(vmeta, vdc->Primary_Element_Count)) {
1851			j = 0;
1852			bvd++;
1853		}
1854		if (vmeta->bvdc[bvd] == NULL)
1855			continue;
1856		disk = g_raid_md_ddf_get_disk(sc, NULL,
1857		    GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[j]));
1858		if (disk != NULL)
1859			g_raid_md_ddf_start_disk(disk, vol);
1860	}
1861
1862	pv->pv_started = 1;
1863	mdi->mdio_starting--;
1864	callout_stop(&pv->pv_start_co);
1865	G_RAID_DEBUG1(0, sc, "Volume started.");
1866	g_raid_md_write_ddf(md, vol, NULL, NULL);
1867
1868	/* Pickup any STALE/SPARE disks to refill array if needed. */
1869	g_raid_md_ddf_refill(sc);
1870
1871	g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME);
1872}
1873
1874static void
1875g_raid_ddf_go(void *arg)
1876{
1877	struct g_raid_volume *vol;
1878	struct g_raid_softc *sc;
1879	struct g_raid_md_ddf_pervolume *pv;
1880
1881	vol = arg;
1882	pv = vol->v_md_data;
1883	sc = vol->v_softc;
1884	if (!pv->pv_started) {
1885		G_RAID_DEBUG1(0, sc, "Force volume start due to timeout.");
1886		g_raid_event_send(vol, G_RAID_VOLUME_E_STARTMD,
1887		    G_RAID_EVENT_VOLUME);
1888	}
1889}
1890
1891static void
1892g_raid_md_ddf_new_disk(struct g_raid_disk *disk)
1893{
1894	struct g_raid_softc *sc;
1895	struct g_raid_md_object *md;
1896	struct g_raid_md_ddf_perdisk *pd;
1897	struct g_raid_md_ddf_pervolume *pv;
1898	struct g_raid_md_ddf_object *mdi;
1899	struct g_raid_volume *vol;
1900	struct ddf_meta *pdmeta;
1901	struct ddf_vol_meta *vmeta;
1902	struct ddf_vdc_record *vdc;
1903	struct ddf_vd_entry *vde;
1904	int i, j, k, num, have, need, needthis, cnt, spare;
1905	uint32_t val;
1906	char buf[17];
1907
1908	sc = disk->d_softc;
1909	md = sc->sc_md;
1910	mdi = (struct g_raid_md_ddf_object *)md;
1911	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
1912	pdmeta = &pd->pd_meta;
1913	spare = -1;
1914
1915	if (mdi->mdio_meta.hdr == NULL)
1916		ddf_meta_copy(&mdi->mdio_meta, pdmeta);
1917	else
1918		ddf_meta_update(&mdi->mdio_meta, pdmeta);
1919
1920	num = GETCRNUM(pdmeta);
1921	for (j = 0; j < num; j++) {
1922		vdc = GETVDCPTR(pdmeta, j);
1923		val = GET32D(pdmeta, vdc->Signature);
1924
1925		if (val == DDF_SA_SIGNATURE && spare == -1)
1926			spare = 1;
1927
1928		if (val != DDF_VDCR_SIGNATURE)
1929			continue;
1930		spare = 0;
1931		k = ddf_meta_find_vd(pdmeta, vdc->VD_GUID);
1932		if (k < 0)
1933			continue;
1934		vde = &pdmeta->vdr->entry[k];
1935
1936		/* Look for volume with matching ID. */
1937		vol = g_raid_md_ddf_get_volume(sc, vdc->VD_GUID);
1938		if (vol == NULL) {
1939			ddf_meta_get_name(pdmeta, k, buf);
1940			vol = g_raid_create_volume(sc, buf,
1941			    GET16D(pdmeta, vde->VD_Number));
1942			pv = malloc(sizeof(*pv), M_MD_DDF, M_WAITOK | M_ZERO);
1943			vol->v_md_data = pv;
1944			callout_init(&pv->pv_start_co, 1);
1945			callout_reset(&pv->pv_start_co,
1946			    g_raid_start_timeout * hz,
1947			    g_raid_ddf_go, vol);
1948			mdi->mdio_starting++;
1949		} else
1950			pv = vol->v_md_data;
1951
1952		/* If we haven't started yet - check metadata freshness. */
1953		vmeta = &pv->pv_meta;
1954		ddf_vol_meta_update(vmeta, pdmeta, vdc->VD_GUID, pv->pv_started);
1955	}
1956
1957	if (spare == 1) {
1958		g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
1959		g_raid_md_ddf_refill(sc);
1960	}
1961
1962	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1963		pv = vol->v_md_data;
1964		vmeta = &pv->pv_meta;
1965
1966		/* If we collected all needed disks - start array. */
1967		need = 0;
1968		needthis = 0;
1969		have = 0;
1970		for (k = 0; k < GET8(vmeta, vdc->Secondary_Element_Count); k++) {
1971			if (vmeta->bvdc[k] == NULL) {
1972				need += GET16(vmeta, vdc->Primary_Element_Count);
1973				continue;
1974			}
1975			cnt = GET16(vmeta, bvdc[k]->Primary_Element_Count);
1976			need += cnt;
1977			for (i = 0; i < cnt; i++) {
1978				val = GET32(vmeta, bvdc[k]->Physical_Disk_Sequence[i]);
1979				if (GET32(pdmeta, pdd->PD_Reference) == val)
1980					needthis++;
1981				else if (g_raid_md_ddf_get_disk(sc, NULL, val) != NULL)
1982					have++;
1983			}
1984		}
1985		if (!needthis)
1986			continue;
1987		if (pv->pv_started) {
1988			if (g_raid_md_ddf_start_disk(disk, vol))
1989				g_raid_md_write_ddf(md, vol, NULL, NULL);
1990		} else {
1991			G_RAID_DEBUG1(1, sc, "Volume %s now has %d of %d disks",
1992			    vol->v_name, have + needthis, need);
1993			if (have + needthis == need)
1994				g_raid_md_ddf_start(vol);
1995		}
1996	}
1997}
1998
1999static int
2000g_raid_md_create_ddf(struct g_raid_md_object *md, struct g_class *mp,
2001    struct g_geom **gp)
2002{
2003	struct g_geom *geom;
2004	struct g_raid_softc *sc;
2005
2006	/* Search for existing node. */
2007	LIST_FOREACH(geom, &mp->geom, geom) {
2008		sc = geom->softc;
2009		if (sc == NULL)
2010			continue;
2011		if (sc->sc_stopping != 0)
2012			continue;
2013		if (sc->sc_md->mdo_class != md->mdo_class)
2014			continue;
2015		break;
2016	}
2017	if (geom != NULL) {
2018		*gp = geom;
2019		return (G_RAID_MD_TASTE_EXISTING);
2020	}
2021
2022	/* Create new one if not found. */
2023	sc = g_raid_create_node(mp, "DDF", md);
2024	if (sc == NULL)
2025		return (G_RAID_MD_TASTE_FAIL);
2026	md->mdo_softc = sc;
2027	*gp = sc->sc_geom;
2028	return (G_RAID_MD_TASTE_NEW);
2029}
2030
2031static int
2032g_raid_md_taste_ddf(struct g_raid_md_object *md, struct g_class *mp,
2033                              struct g_consumer *cp, struct g_geom **gp)
2034{
2035	struct g_consumer *rcp;
2036	struct g_provider *pp;
2037	struct g_raid_softc *sc;
2038	struct g_raid_disk *disk;
2039	struct ddf_meta meta;
2040	struct g_raid_md_ddf_perdisk *pd;
2041	struct g_geom *geom;
2042	int error, result, len;
2043	char name[16];
2044
2045	G_RAID_DEBUG(1, "Tasting DDF on %s", cp->provider->name);
2046	pp = cp->provider;
2047
2048	/* Read metadata from device. */
2049	if (g_access(cp, 1, 0, 0) != 0)
2050		return (G_RAID_MD_TASTE_FAIL);
2051	g_topology_unlock();
2052	bzero(&meta, sizeof(meta));
2053	error = ddf_meta_read(cp, &meta);
2054	g_topology_lock();
2055	g_access(cp, -1, 0, 0);
2056	if (error != 0)
2057		return (G_RAID_MD_TASTE_FAIL);
2058
2059	/* Metadata valid. Print it. */
2060	g_raid_md_ddf_print(&meta);
2061
2062	/* Search for matching node. */
2063	sc = NULL;
2064	LIST_FOREACH(geom, &mp->geom, geom) {
2065		sc = geom->softc;
2066		if (sc == NULL)
2067			continue;
2068		if (sc->sc_stopping != 0)
2069			continue;
2070		if (sc->sc_md->mdo_class != md->mdo_class)
2071			continue;
2072		break;
2073	}
2074
2075	/* Found matching node. */
2076	if (geom != NULL) {
2077		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
2078		result = G_RAID_MD_TASTE_EXISTING;
2079
2080	} else { /* Not found matching node -- create one. */
2081		result = G_RAID_MD_TASTE_NEW;
2082		snprintf(name, sizeof(name), "DDF");
2083		sc = g_raid_create_node(mp, name, md);
2084		md->mdo_softc = sc;
2085		geom = sc->sc_geom;
2086	}
2087
2088	rcp = g_new_consumer(geom);
2089	g_attach(rcp, pp);
2090	if (g_access(rcp, 1, 1, 1) != 0)
2091		; //goto fail1;
2092
2093	g_topology_unlock();
2094	sx_xlock(&sc->sc_lock);
2095
2096	pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO);
2097	pd->pd_meta = meta;
2098	disk = g_raid_create_disk(sc);
2099	disk->d_md_data = (void *)pd;
2100	disk->d_consumer = rcp;
2101	rcp->private = disk;
2102
2103	/* Read kernel dumping information. */
2104	disk->d_kd.offset = 0;
2105	disk->d_kd.length = OFF_MAX;
2106	len = sizeof(disk->d_kd);
2107	error = g_io_getattr("GEOM::kerneldump", rcp, &len, &disk->d_kd);
2108	if (disk->d_kd.di.dumper == NULL)
2109		G_RAID_DEBUG1(2, sc, "Dumping not supported by %s: %d.",
2110		    rcp->provider->name, error);
2111
2112	g_raid_md_ddf_new_disk(disk);
2113
2114	sx_xunlock(&sc->sc_lock);
2115	g_topology_lock();
2116	*gp = geom;
2117	return (result);
2118}
2119
2120static int
2121g_raid_md_event_ddf(struct g_raid_md_object *md,
2122    struct g_raid_disk *disk, u_int event)
2123{
2124	struct g_raid_softc *sc;
2125
2126	sc = md->mdo_softc;
2127	if (disk == NULL)
2128		return (-1);
2129	switch (event) {
2130	case G_RAID_DISK_E_DISCONNECTED:
2131		/* Delete disk. */
2132		g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
2133		g_raid_destroy_disk(disk);
2134		g_raid_md_ddf_purge_volumes(sc);
2135
2136		/* Write updated metadata to all disks. */
2137		g_raid_md_write_ddf(md, NULL, NULL, NULL);
2138
2139		/* Check if anything left. */
2140		if (g_raid_ndisks(sc, -1) == 0)
2141			g_raid_destroy_node(sc, 0);
2142		else
2143			g_raid_md_ddf_refill(sc);
2144		return (0);
2145	}
2146	return (-2);
2147}
2148
2149static int
2150g_raid_md_volume_event_ddf(struct g_raid_md_object *md,
2151    struct g_raid_volume *vol, u_int event)
2152{
2153	struct g_raid_md_ddf_pervolume *pv;
2154
2155	pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
2156	switch (event) {
2157	case G_RAID_VOLUME_E_STARTMD:
2158		if (!pv->pv_started)
2159			g_raid_md_ddf_start(vol);
2160		return (0);
2161	}
2162	return (-2);
2163}
2164
2165static int
2166g_raid_md_ctl_ddf(struct g_raid_md_object *md,
2167    struct gctl_req *req)
2168{
2169	struct g_raid_softc *sc;
2170	struct g_raid_volume *vol, *vol1;
2171	struct g_raid_subdisk *sd;
2172	struct g_raid_disk *disk, *disks[DDF_MAX_DISKS_HARD];
2173	struct g_raid_md_ddf_perdisk *pd;
2174	struct g_raid_md_ddf_pervolume *pv;
2175	struct g_raid_md_ddf_object *mdi;
2176	struct g_consumer *cp;
2177	struct g_provider *pp;
2178	char arg[16];
2179	const char *verb, *volname, *levelname, *diskname;
2180	char *tmp;
2181	int *nargs, *force;
2182	off_t size, sectorsize, strip, offs[DDF_MAX_DISKS_HARD], esize;
2183	intmax_t *sizearg, *striparg;
2184	int i, numdisks, len, level, qual;
2185	int error;
2186
2187	sc = md->mdo_softc;
2188	mdi = (struct g_raid_md_ddf_object *)md;
2189	verb = gctl_get_param(req, "verb", NULL);
2190	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
2191	error = 0;
2192
2193	if (strcmp(verb, "label") == 0) {
2194
2195		if (*nargs < 4) {
2196			gctl_error(req, "Invalid number of arguments.");
2197			return (-1);
2198		}
2199		volname = gctl_get_asciiparam(req, "arg1");
2200		if (volname == NULL) {
2201			gctl_error(req, "No volume name.");
2202			return (-2);
2203		}
2204		levelname = gctl_get_asciiparam(req, "arg2");
2205		if (levelname == NULL) {
2206			gctl_error(req, "No RAID level.");
2207			return (-3);
2208		}
2209		if (g_raid_volume_str2level(levelname, &level, &qual)) {
2210			gctl_error(req, "Unknown RAID level '%s'.", levelname);
2211			return (-4);
2212		}
2213		numdisks = *nargs - 3;
2214		force = gctl_get_paraml(req, "force", sizeof(*force));
2215		if (!g_raid_md_ddf_supported(level, qual, numdisks,
2216		    force ? *force : 0)) {
2217			gctl_error(req, "Unsupported RAID level "
2218			    "(0x%02x/0x%02x), or number of disks (%d).",
2219			    level, qual, numdisks);
2220			return (-5);
2221		}
2222
2223		/* Search for disks, connect them and probe. */
2224		size = INT64_MAX;
2225		sectorsize = 0;
2226		bzero(disks, sizeof(disks));
2227		bzero(offs, sizeof(offs));
2228		for (i = 0; i < numdisks; i++) {
2229			snprintf(arg, sizeof(arg), "arg%d", i + 3);
2230			diskname = gctl_get_asciiparam(req, arg);
2231			if (diskname == NULL) {
2232				gctl_error(req, "No disk name (%s).", arg);
2233				error = -6;
2234				break;
2235			}
2236			if (strcmp(diskname, "NONE") == 0)
2237				continue;
2238
2239			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2240				if (disk->d_consumer != NULL &&
2241				    disk->d_consumer->provider != NULL &&
2242				    strcmp(disk->d_consumer->provider->name,
2243				     diskname) == 0)
2244					break;
2245			}
2246			if (disk != NULL) {
2247				if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
2248					gctl_error(req, "Disk '%s' is in a "
2249					    "wrong state (%s).", diskname,
2250					    g_raid_disk_state2str(disk->d_state));
2251					error = -7;
2252					break;
2253				}
2254				pd = disk->d_md_data;
2255				if (ddf_meta_count_vdc(&pd->pd_meta, NULL) >=
2256				    GET16(&pd->pd_meta, hdr->Max_Partitions)) {
2257					gctl_error(req, "No free partitions "
2258					    "on disk '%s'.",
2259					    diskname);
2260					error = -7;
2261					break;
2262				}
2263				pp = disk->d_consumer->provider;
2264				disks[i] = disk;
2265				ddf_meta_unused_range(&pd->pd_meta,
2266				    &offs[i], &esize);
2267				size = MIN(size, (off_t)esize * pp->sectorsize);
2268				sectorsize = MAX(sectorsize, pp->sectorsize);
2269				continue;
2270			}
2271
2272			g_topology_lock();
2273			cp = g_raid_open_consumer(sc, diskname);
2274			if (cp == NULL) {
2275				gctl_error(req, "Can't open disk '%s'.",
2276				    diskname);
2277				g_topology_unlock();
2278				error = -8;
2279				break;
2280			}
2281			pp = cp->provider;
2282			pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO);
2283			disk = g_raid_create_disk(sc);
2284			disk->d_md_data = (void *)pd;
2285			disk->d_consumer = cp;
2286			disks[i] = disk;
2287			cp->private = disk;
2288			ddf_meta_create(disk, &mdi->mdio_meta);
2289			if (mdi->mdio_meta.hdr == NULL)
2290				ddf_meta_copy(&mdi->mdio_meta, &pd->pd_meta);
2291			else
2292				ddf_meta_update(&mdi->mdio_meta, &pd->pd_meta);
2293			g_topology_unlock();
2294
2295			/* Read kernel dumping information. */
2296			disk->d_kd.offset = 0;
2297			disk->d_kd.length = OFF_MAX;
2298			len = sizeof(disk->d_kd);
2299			g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
2300			if (disk->d_kd.di.dumper == NULL)
2301				G_RAID_DEBUG1(2, sc,
2302				    "Dumping not supported by %s.",
2303				    cp->provider->name);
2304
2305			/* Reserve some space for metadata. */
2306			size = MIN(size, pp->mediasize - 131072llu * pp->sectorsize);
2307			sectorsize = MAX(sectorsize, pp->sectorsize);
2308		}
2309		if (error != 0) {
2310			for (i = 0; i < numdisks; i++) {
2311				if (disks[i] != NULL &&
2312				    disks[i]->d_state == G_RAID_DISK_S_NONE)
2313					g_raid_destroy_disk(disks[i]);
2314			}
2315			return (error);
2316		}
2317
2318		if (sectorsize <= 0) {
2319			gctl_error(req, "Can't get sector size.");
2320			return (-8);
2321		}
2322
2323		/* Handle size argument. */
2324		len = sizeof(*sizearg);
2325		sizearg = gctl_get_param(req, "size", &len);
2326		if (sizearg != NULL && len == sizeof(*sizearg) &&
2327		    *sizearg > 0) {
2328			if (*sizearg > size) {
2329				gctl_error(req, "Size too big %lld > %lld.",
2330				    (long long)*sizearg, (long long)size);
2331				return (-9);
2332			}
2333			size = *sizearg;
2334		}
2335
2336		/* Handle strip argument. */
2337		strip = 131072;
2338		len = sizeof(*striparg);
2339		striparg = gctl_get_param(req, "strip", &len);
2340		if (striparg != NULL && len == sizeof(*striparg) &&
2341		    *striparg > 0) {
2342			if (*striparg < sectorsize) {
2343				gctl_error(req, "Strip size too small.");
2344				return (-10);
2345			}
2346			if (*striparg % sectorsize != 0) {
2347				gctl_error(req, "Incorrect strip size.");
2348				return (-11);
2349			}
2350			strip = *striparg;
2351		}
2352
2353		/* Round size down to strip or sector. */
2354		if (level == G_RAID_VOLUME_RL_RAID1 ||
2355		    level == G_RAID_VOLUME_RL_RAID3 ||
2356		    level == G_RAID_VOLUME_RL_SINGLE ||
2357		    level == G_RAID_VOLUME_RL_CONCAT)
2358			size -= (size % sectorsize);
2359		else if (level == G_RAID_VOLUME_RL_RAID1E &&
2360		    (numdisks & 1) != 0)
2361			size -= (size % (2 * strip));
2362		else
2363			size -= (size % strip);
2364		if (size <= 0) {
2365			gctl_error(req, "Size too small.");
2366			return (-13);
2367		}
2368
2369		/* We have all we need, create things: volume, ... */
2370		pv = malloc(sizeof(*pv), M_MD_DDF, M_WAITOK | M_ZERO);
2371		ddf_vol_meta_create(&pv->pv_meta, &mdi->mdio_meta);
2372		pv->pv_started = 1;
2373		vol = g_raid_create_volume(sc, volname, -1);
2374		vol->v_md_data = pv;
2375		vol->v_raid_level = level;
2376		vol->v_raid_level_qualifier = qual;
2377		vol->v_strip_size = strip;
2378		vol->v_disks_count = numdisks;
2379		if (level == G_RAID_VOLUME_RL_RAID0 ||
2380		    level == G_RAID_VOLUME_RL_CONCAT ||
2381		    level == G_RAID_VOLUME_RL_SINGLE)
2382			vol->v_mediasize = size * numdisks;
2383		else if (level == G_RAID_VOLUME_RL_RAID1)
2384			vol->v_mediasize = size;
2385		else if (level == G_RAID_VOLUME_RL_RAID3 ||
2386		    level == G_RAID_VOLUME_RL_RAID4 ||
2387		    level == G_RAID_VOLUME_RL_RAID5 ||
2388		    level == G_RAID_VOLUME_RL_RAID5R)
2389			vol->v_mediasize = size * (numdisks - 1);
2390		else if (level == G_RAID_VOLUME_RL_RAID6 ||
2391		    level == G_RAID_VOLUME_RL_RAID5E ||
2392		    level == G_RAID_VOLUME_RL_RAID5EE)
2393			vol->v_mediasize = size * (numdisks - 2);
2394		else if (level == G_RAID_VOLUME_RL_RAIDMDF)
2395			vol->v_mediasize = size * (numdisks - 3);
2396		else { /* RAID1E */
2397			vol->v_mediasize = ((size * numdisks) / strip / 2) *
2398			    strip;
2399		}
2400		vol->v_sectorsize = sectorsize;
2401		g_raid_start_volume(vol);
2402
2403		/* , and subdisks. */
2404		for (i = 0; i < numdisks; i++) {
2405			disk = disks[i];
2406			sd = &vol->v_subdisks[i];
2407			sd->sd_disk = disk;
2408			sd->sd_offset = (off_t)offs[i] * 512;
2409			sd->sd_size = size;
2410			if (disk == NULL)
2411				continue;
2412			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
2413			g_raid_change_disk_state(disk,
2414			    G_RAID_DISK_S_ACTIVE);
2415			g_raid_change_subdisk_state(sd,
2416			    G_RAID_SUBDISK_S_ACTIVE);
2417			g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
2418			    G_RAID_EVENT_SUBDISK);
2419		}
2420
2421		/* Write metadata based on created entities. */
2422		G_RAID_DEBUG1(0, sc, "Array started.");
2423		g_raid_md_write_ddf(md, vol, NULL, NULL);
2424
2425		/* Pickup any STALE/SPARE disks to refill array if needed. */
2426		g_raid_md_ddf_refill(sc);
2427
2428		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
2429		    G_RAID_EVENT_VOLUME);
2430		return (0);
2431	}
2432	if (strcmp(verb, "add") == 0) {
2433
2434		gctl_error(req, "`add` command is not applicable, "
2435		    "use `label` instead.");
2436		return (-99);
2437	}
2438	if (strcmp(verb, "delete") == 0) {
2439
2440		/* Full node destruction. */
2441		if (*nargs == 1) {
2442			/* Check if some volume is still open. */
2443			force = gctl_get_paraml(req, "force", sizeof(*force));
2444			if (force != NULL && *force == 0 &&
2445			    g_raid_nopens(sc) != 0) {
2446				gctl_error(req, "Some volume is still open.");
2447				return (-4);
2448			}
2449
2450			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2451				if (disk->d_consumer)
2452					ddf_meta_erase(disk->d_consumer);
2453			}
2454			g_raid_destroy_node(sc, 0);
2455			return (0);
2456		}
2457
2458		/* Destroy specified volume. If it was last - all node. */
2459		if (*nargs != 2) {
2460			gctl_error(req, "Invalid number of arguments.");
2461			return (-1);
2462		}
2463		volname = gctl_get_asciiparam(req, "arg1");
2464		if (volname == NULL) {
2465			gctl_error(req, "No volume name.");
2466			return (-2);
2467		}
2468
2469		/* Search for volume. */
2470		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2471			if (strcmp(vol->v_name, volname) == 0)
2472				break;
2473		}
2474		if (vol == NULL) {
2475			i = strtol(volname, &tmp, 10);
2476			if (verb != volname && tmp[0] == 0) {
2477				TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2478					if (vol->v_global_id == i)
2479						break;
2480				}
2481			}
2482		}
2483		if (vol == NULL) {
2484			gctl_error(req, "Volume '%s' not found.", volname);
2485			return (-3);
2486		}
2487
2488		/* Check if volume is still open. */
2489		force = gctl_get_paraml(req, "force", sizeof(*force));
2490		if (force != NULL && *force == 0 &&
2491		    vol->v_provider_open != 0) {
2492			gctl_error(req, "Volume is still open.");
2493			return (-4);
2494		}
2495
2496		/* Destroy volume and potentially node. */
2497		i = 0;
2498		TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
2499			i++;
2500		if (i >= 2) {
2501			g_raid_destroy_volume(vol);
2502			g_raid_md_ddf_purge_disks(sc);
2503			g_raid_md_write_ddf(md, NULL, NULL, NULL);
2504		} else {
2505			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2506				if (disk->d_consumer)
2507					ddf_meta_erase(disk->d_consumer);
2508			}
2509			g_raid_destroy_node(sc, 0);
2510		}
2511		return (0);
2512	}
2513	if (strcmp(verb, "remove") == 0 ||
2514	    strcmp(verb, "fail") == 0) {
2515		if (*nargs < 2) {
2516			gctl_error(req, "Invalid number of arguments.");
2517			return (-1);
2518		}
2519		for (i = 1; i < *nargs; i++) {
2520			snprintf(arg, sizeof(arg), "arg%d", i);
2521			diskname = gctl_get_asciiparam(req, arg);
2522			if (diskname == NULL) {
2523				gctl_error(req, "No disk name (%s).", arg);
2524				error = -2;
2525				break;
2526			}
2527			if (strncmp(diskname, "/dev/", 5) == 0)
2528				diskname += 5;
2529
2530			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2531				if (disk->d_consumer != NULL &&
2532				    disk->d_consumer->provider != NULL &&
2533				    strcmp(disk->d_consumer->provider->name,
2534				     diskname) == 0)
2535					break;
2536			}
2537			if (disk == NULL) {
2538				gctl_error(req, "Disk '%s' not found.",
2539				    diskname);
2540				error = -3;
2541				break;
2542			}
2543
2544			if (strcmp(verb, "fail") == 0) {
2545				g_raid_md_fail_disk_ddf(md, NULL, disk);
2546				continue;
2547			}
2548
2549			/* Erase metadata on deleting disk and destroy it. */
2550			ddf_meta_erase(disk->d_consumer);
2551			g_raid_destroy_disk(disk);
2552		}
2553		g_raid_md_ddf_purge_volumes(sc);
2554
2555		/* Write updated metadata to remaining disks. */
2556		g_raid_md_write_ddf(md, NULL, NULL, NULL);
2557
2558		/* Check if anything left. */
2559		if (g_raid_ndisks(sc, -1) == 0)
2560			g_raid_destroy_node(sc, 0);
2561		else
2562			g_raid_md_ddf_refill(sc);
2563		return (error);
2564	}
2565	if (strcmp(verb, "insert") == 0) {
2566		if (*nargs < 2) {
2567			gctl_error(req, "Invalid number of arguments.");
2568			return (-1);
2569		}
2570		for (i = 1; i < *nargs; i++) {
2571			/* Get disk name. */
2572			snprintf(arg, sizeof(arg), "arg%d", i);
2573			diskname = gctl_get_asciiparam(req, arg);
2574			if (diskname == NULL) {
2575				gctl_error(req, "No disk name (%s).", arg);
2576				error = -3;
2577				break;
2578			}
2579
2580			/* Try to find provider with specified name. */
2581			g_topology_lock();
2582			cp = g_raid_open_consumer(sc, diskname);
2583			if (cp == NULL) {
2584				gctl_error(req, "Can't open disk '%s'.",
2585				    diskname);
2586				g_topology_unlock();
2587				error = -4;
2588				break;
2589			}
2590			pp = cp->provider;
2591			g_topology_unlock();
2592
2593			pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO);
2594
2595			disk = g_raid_create_disk(sc);
2596			disk->d_consumer = cp;
2597			disk->d_md_data = (void *)pd;
2598			cp->private = disk;
2599
2600			/* Read kernel dumping information. */
2601			disk->d_kd.offset = 0;
2602			disk->d_kd.length = OFF_MAX;
2603			len = sizeof(disk->d_kd);
2604			g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
2605			if (disk->d_kd.di.dumper == NULL)
2606				G_RAID_DEBUG1(2, sc,
2607				    "Dumping not supported by %s.",
2608				    cp->provider->name);
2609
2610			/* Welcome the "new" disk. */
2611			g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
2612			ddf_meta_create(disk, &mdi->mdio_meta);
2613			if (mdi->mdio_meta.hdr == NULL)
2614				ddf_meta_copy(&mdi->mdio_meta, &pd->pd_meta);
2615			else
2616				ddf_meta_update(&mdi->mdio_meta, &pd->pd_meta);
2617//			ddf_meta_write_spare(cp);
2618			g_raid_md_ddf_refill(sc);
2619		}
2620		return (error);
2621	}
2622	return (-100);
2623}
2624
2625static int
2626g_raid_md_write_ddf(struct g_raid_md_object *md, struct g_raid_volume *tvol,
2627    struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2628{
2629	struct g_raid_softc *sc;
2630	struct g_raid_volume *vol;
2631	struct g_raid_subdisk *sd;
2632	struct g_raid_disk *disk;
2633	struct g_raid_md_ddf_perdisk *pd;
2634	struct g_raid_md_ddf_pervolume *pv;
2635	struct g_raid_md_ddf_object *mdi;
2636	struct ddf_meta *gmeta;
2637	struct ddf_vol_meta *vmeta;
2638	struct ddf_vdc_record *vdc;
2639	uint64_t *val2;
2640	int i, j, pos, bvd, size;
2641
2642	sc = md->mdo_softc;
2643	mdi = (struct g_raid_md_ddf_object *)md;
2644	gmeta = &mdi->mdio_meta;
2645
2646	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
2647		return (0);
2648
2649	/*
2650	 * Clear disk flags to let only really needed ones to be reset.
2651	 * Do it only if there are no volumes in starting state now,
2652	 * as they can update disk statuses yet and we may kill innocent.
2653	 */
2654	if (mdi->mdio_starting == 0) {
2655		for (i = 0; i < GET16(gmeta, pdr->Populated_PDEs); i++) {
2656			if (isff(gmeta->pdr->entry[i].PD_GUID, 24))
2657				continue;
2658			SET16(gmeta, pdr->entry[i].PD_Type,
2659			    GET16(gmeta, pdr->entry[i].PD_Type) &
2660			    ~DDF_PDE_PARTICIPATING);
2661			if ((GET16(gmeta, pdr->entry[i].PD_State) &
2662			    DDF_PDE_PFA) == 0)
2663				SET16(gmeta, pdr->entry[i].PD_State, 0);
2664		}
2665	}
2666
2667	/* Generate/update new per-volume metadata. */
2668	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2669		pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
2670		if (vol->v_stopping || !pv->pv_started)
2671			continue;
2672		vmeta = &pv->pv_meta;
2673
2674		SET32(vmeta, vdc->Sequence_Number,
2675		    GET32(vmeta, vdc->Sequence_Number) + 1);
2676		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E &&
2677		    vol->v_disks_count % 2 == 0)
2678			SET16(vmeta, vdc->Primary_Element_Count, 2);
2679		else
2680			SET16(vmeta, vdc->Primary_Element_Count,
2681			    vol->v_disks_count);
2682		SET8(vmeta, vdc->Stripe_Size,
2683		    ffs(vol->v_strip_size / vol->v_sectorsize) - 1);
2684		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E &&
2685		    vol->v_disks_count % 2 == 0) {
2686			SET8(vmeta, vdc->Primary_RAID_Level,
2687			    DDF_VDCR_RAID1);
2688			SET8(vmeta, vdc->RLQ, 0);
2689			SET8(vmeta, vdc->Secondary_Element_Count,
2690			    vol->v_disks_count / 2);
2691			SET8(vmeta, vdc->Secondary_RAID_Level, 0);
2692		} else {
2693			SET8(vmeta, vdc->Primary_RAID_Level,
2694			    vol->v_raid_level);
2695			SET8(vmeta, vdc->RLQ,
2696			    vol->v_raid_level_qualifier);
2697			SET8(vmeta, vdc->Secondary_Element_Count, 1);
2698			SET8(vmeta, vdc->Secondary_RAID_Level, 0);
2699		}
2700		SET8(vmeta, vdc->Secondary_Element_Seq, 0);
2701		SET64(vmeta, vdc->Block_Count, 0);
2702		SET64(vmeta, vdc->VD_Size, vol->v_mediasize / vol->v_sectorsize);
2703		SET16(vmeta, vdc->Block_Size, vol->v_sectorsize);
2704
2705		SET16(vmeta, vde->VD_Number, vol->v_global_id);
2706		if (vol->v_state <= G_RAID_VOLUME_S_BROKEN)
2707			SET8(vmeta, vde->VD_State, DDF_VDE_FAILED);
2708		else if (vol->v_state <= G_RAID_VOLUME_S_DEGRADED)
2709			SET8(vmeta, vde->VD_State, DDF_VDE_DEGRADED);
2710		else if (vol->v_state <= G_RAID_VOLUME_S_SUBOPTIMAL)
2711			SET8(vmeta, vde->VD_State, DDF_VDE_PARTIAL);
2712		else
2713			SET8(vmeta, vde->VD_State, DDF_VDE_OPTIMAL);
2714		if (vol->v_dirty ||
2715		    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) > 0 ||
2716		    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC) > 0)
2717			SET8(vmeta, vde->VD_State,
2718			    GET8(vmeta, vde->VD_State) | DDF_VDE_DIRTY);
2719		SET8(vmeta, vde->Init_State, DDF_VDE_INIT_FULL); // XXX
2720		ddf_meta_put_name(vmeta, vol->v_name);
2721
2722		for (i = 0; i < vol->v_disks_count; i++) {
2723			sd = &vol->v_subdisks[i];
2724			bvd = i / GET16(vmeta, vdc->Primary_Element_Count);
2725			pos = i % GET16(vmeta, vdc->Primary_Element_Count);
2726			disk = sd->sd_disk;
2727			if (disk != NULL) {
2728				pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
2729				if (vmeta->bvdc[bvd] == NULL) {
2730					size = GET16(vmeta,
2731					    hdr->Configuration_Record_Length) *
2732					    vmeta->sectorsize;
2733					vmeta->bvdc[bvd] = malloc(size,
2734					    M_MD_DDF, M_WAITOK);
2735					memset(vmeta->bvdc[bvd], 0xff, size);
2736				}
2737				memcpy(vmeta->bvdc[bvd], vmeta->vdc,
2738				    sizeof(struct ddf_vdc_record));
2739				SET8(vmeta, bvdc[bvd]->Secondary_Element_Seq, bvd);
2740				SET64(vmeta, bvdc[bvd]->Block_Count,
2741				    sd->sd_size / vol->v_sectorsize);
2742				SET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos],
2743				    GET32(&pd->pd_meta, pdd->PD_Reference));
2744				val2 = (uint64_t *)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[
2745				    GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
2746				SET64P(vmeta, val2 + pos,
2747				    sd->sd_offset / vol->v_sectorsize);
2748			}
2749			if (vmeta->bvdc[bvd] == NULL)
2750				continue;
2751
2752			j = ddf_meta_find_pd(gmeta, NULL,
2753			    GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos]));
2754			if (j < 0)
2755				continue;
2756			SET32(gmeta, pdr->entry[j].PD_Type,
2757			    GET32(gmeta, pdr->entry[j].PD_Type) |
2758			    DDF_PDE_PARTICIPATING);
2759			if (sd->sd_state == G_RAID_SUBDISK_S_NONE)
2760				SET32(gmeta, pdr->entry[j].PD_State,
2761				    GET32(gmeta, pdr->entry[j].PD_State) |
2762				    DDF_PDE_FAILED | DDF_PDE_MISSING);
2763			else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED)
2764				SET32(gmeta, pdr->entry[j].PD_State,
2765				    GET32(gmeta, pdr->entry[j].PD_State) |
2766				    DDF_PDE_FAILED | DDF_PDE_PFA);
2767			else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD)
2768				SET32(gmeta, pdr->entry[j].PD_State,
2769				    GET32(gmeta, pdr->entry[j].PD_State) |
2770				    DDF_PDE_FAILED);
2771			else
2772				SET32(gmeta, pdr->entry[j].PD_State,
2773				    GET32(gmeta, pdr->entry[j].PD_State) |
2774				    DDF_PDE_ONLINE);
2775		}
2776	}
2777
2778	/* Remove disks without "participating" flag (unused). */
2779	for (i = 0, j = -1; i < GET16(gmeta, pdr->Populated_PDEs); i++) {
2780		if (isff(gmeta->pdr->entry[i].PD_GUID, 24))
2781			continue;
2782		if (GET16(gmeta, pdr->entry[i].PD_Type) & DDF_PDE_PARTICIPATING)
2783			j = i;
2784		else
2785			memset(&gmeta->pdr->entry[i], 0xff,
2786			    sizeof(struct ddf_pd_entry));
2787	}
2788	SET16(gmeta, pdr->Populated_PDEs, j + 1);
2789
2790	/* Update per-disk metadata and write them. */
2791	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2792		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
2793		if (disk->d_state != G_RAID_DISK_S_ACTIVE)
2794			continue;
2795		/* Update PDR. */
2796		memcpy(pd->pd_meta.pdr, gmeta->pdr,
2797		    GET32(&pd->pd_meta, hdr->pdr_length) *
2798		    pd->pd_meta.sectorsize);
2799		/* Update VDR. */
2800		SET16(&pd->pd_meta, vdr->Populated_VDEs, 0);
2801		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2802			if (vol->v_stopping)
2803				continue;
2804			pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
2805			i = ddf_meta_find_vd(&pd->pd_meta,
2806			    pv->pv_meta.vde->VD_GUID);
2807			if (i < 0)
2808				i = ddf_meta_find_vd(&pd->pd_meta, NULL);
2809			if (i >= 0)
2810				memcpy(&pd->pd_meta.vdr->entry[i],
2811				    pv->pv_meta.vde,
2812				    sizeof(struct ddf_vd_entry));
2813		}
2814		/* Update VDC. */
2815		if (mdi->mdio_starting == 0) {
2816			/* Remove all VDCs to restore needed later. */
2817			j = GETCRNUM(&pd->pd_meta);
2818			for (i = 0; i < j; i++) {
2819				vdc = GETVDCPTR(&pd->pd_meta, i);
2820				if (GET32D(&pd->pd_meta, vdc->Signature) !=
2821				    DDF_VDCR_SIGNATURE)
2822					continue;
2823				SET32D(&pd->pd_meta, vdc->Signature, 0xffffffff);
2824			}
2825		}
2826		TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
2827			vol = sd->sd_volume;
2828			if (vol->v_stopping)
2829				continue;
2830			pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
2831			vmeta = &pv->pv_meta;
2832			vdc = ddf_meta_find_vdc(&pd->pd_meta,
2833			    vmeta->vde->VD_GUID);
2834			if (vdc == NULL)
2835				vdc = ddf_meta_find_vdc(&pd->pd_meta, NULL);
2836			if (vdc != NULL) {
2837				bvd = sd->sd_pos / GET16(vmeta,
2838				    vdc->Primary_Element_Count);
2839				memcpy(vdc, vmeta->bvdc[bvd],
2840				    GET16(&pd->pd_meta,
2841				    hdr->Configuration_Record_Length) *
2842				    pd->pd_meta.sectorsize);
2843			}
2844		}
2845		G_RAID_DEBUG(1, "Writing DDF metadata to %s",
2846		    g_raid_get_diskname(disk));
2847		g_raid_md_ddf_print(&pd->pd_meta);
2848		ddf_meta_write(disk->d_consumer, &pd->pd_meta);
2849	}
2850	return (0);
2851}
2852
2853static int
2854g_raid_md_fail_disk_ddf(struct g_raid_md_object *md,
2855    struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2856{
2857	struct g_raid_softc *sc;
2858	struct g_raid_md_ddf_perdisk *pd;
2859	struct g_raid_subdisk *sd;
2860	int i;
2861
2862	sc = md->mdo_softc;
2863	pd = (struct g_raid_md_ddf_perdisk *)tdisk->d_md_data;
2864
2865	/* We can't fail disk that is not a part of array now. */
2866	if (tdisk->d_state != G_RAID_DISK_S_ACTIVE)
2867		return (-1);
2868
2869	/*
2870	 * Mark disk as failed in metadata and try to write that metadata
2871	 * to the disk itself to prevent it's later resurrection as STALE.
2872	 */
2873	G_RAID_DEBUG(1, "Writing DDF metadata to %s",
2874	    g_raid_get_diskname(tdisk));
2875	i = ddf_meta_find_pd(&pd->pd_meta, NULL, GET32(&pd->pd_meta, pdd->PD_Reference));
2876	SET16(&pd->pd_meta, pdr->entry[i].PD_State, DDF_PDE_FAILED | DDF_PDE_PFA);
2877	if (tdisk->d_consumer != NULL)
2878		ddf_meta_write(tdisk->d_consumer, &pd->pd_meta);
2879
2880	/* Change states. */
2881	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
2882	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
2883		g_raid_change_subdisk_state(sd,
2884		    G_RAID_SUBDISK_S_FAILED);
2885		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
2886		    G_RAID_EVENT_SUBDISK);
2887	}
2888
2889	/* Write updated metadata to remaining disks. */
2890	g_raid_md_write_ddf(md, NULL, NULL, tdisk);
2891
2892	g_raid_md_ddf_refill(sc);
2893	return (0);
2894}
2895
2896static int
2897g_raid_md_free_disk_ddf(struct g_raid_md_object *md,
2898    struct g_raid_disk *disk)
2899{
2900	struct g_raid_md_ddf_perdisk *pd;
2901
2902	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
2903	ddf_meta_free(&pd->pd_meta);
2904	free(pd, M_MD_DDF);
2905	disk->d_md_data = NULL;
2906	return (0);
2907}
2908
2909static int
2910g_raid_md_free_volume_ddf(struct g_raid_md_object *md,
2911    struct g_raid_volume *vol)
2912{
2913	struct g_raid_md_ddf_object *mdi;
2914	struct g_raid_md_ddf_pervolume *pv;
2915
2916	mdi = (struct g_raid_md_ddf_object *)md;
2917	pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
2918	ddf_vol_meta_free(&pv->pv_meta);
2919	if (!pv->pv_started) {
2920		pv->pv_started = 1;
2921		mdi->mdio_starting--;
2922		callout_stop(&pv->pv_start_co);
2923	}
2924	return (0);
2925}
2926
2927static int
2928g_raid_md_free_ddf(struct g_raid_md_object *md)
2929{
2930	struct g_raid_md_ddf_object *mdi;
2931
2932	mdi = (struct g_raid_md_ddf_object *)md;
2933	if (!mdi->mdio_started) {
2934		mdi->mdio_started = 0;
2935		callout_stop(&mdi->mdio_start_co);
2936		G_RAID_DEBUG1(1, md->mdo_softc,
2937		    "root_mount_rel %p", mdi->mdio_rootmount);
2938		root_mount_rel(mdi->mdio_rootmount);
2939		mdi->mdio_rootmount = NULL;
2940	}
2941	ddf_meta_free(&mdi->mdio_meta);
2942	return (0);
2943}
2944
2945G_RAID_MD_DECLARE(g_raid_md_ddf);
2946