md_ddf.c revision a82e3a8b2430553d1a48209e110921023d727e45
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2012 Alexander Motin <mav@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/bio.h>
34#include <sys/endian.h>
35#include <sys/kernel.h>
36#include <sys/kobj.h>
37#include <sys/limits.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/mutex.h>
41#include <sys/systm.h>
42#include <sys/time.h>
43#include <sys/clock.h>
44#include <geom/geom.h>
45#include "geom/raid/g_raid.h"
46#include "geom/raid/md_ddf.h"
47#include "g_raid_md_if.h"
48
49static MALLOC_DEFINE(M_MD_DDF, "md_ddf_data", "GEOM_RAID DDF metadata");
50
51#define	DDF_MAX_DISKS_HARD	128
52
53#define	DDF_MAX_DISKS	16
54#define	DDF_MAX_VDISKS	7
55#define	DDF_MAX_PARTITIONS	1
56
57#define DECADE (3600*24*(365*10+2))	/* 10 years in seconds. */
58
59struct ddf_meta {
60	u_int	sectorsize;
61	u_int	bigendian;
62	struct ddf_header *hdr;
63	struct ddf_cd_record *cdr;
64	struct ddf_pd_record *pdr;
65	struct ddf_vd_record *vdr;
66	void *cr;
67	struct ddf_pdd_record *pdd;
68	struct ddf_bbm_log *bbm;
69};
70
71struct ddf_vol_meta {
72	u_int	sectorsize;
73	u_int	bigendian;
74	struct ddf_header *hdr;
75	struct ddf_cd_record *cdr;
76	struct ddf_vd_entry *vde;
77	struct ddf_vdc_record *vdc;
78	struct ddf_vdc_record *bvdc[DDF_MAX_DISKS_HARD];
79};
80
81struct g_raid_md_ddf_perdisk {
82	struct ddf_meta	 pd_meta;
83};
84
85struct g_raid_md_ddf_pervolume {
86	struct ddf_vol_meta		 pv_meta;
87	int				 pv_started;
88	struct callout			 pv_start_co;	/* STARTING state timer. */
89};
90
91struct g_raid_md_ddf_object {
92	struct g_raid_md_object	 mdio_base;
93	u_int			 mdio_bigendian;
94	struct ddf_meta		 mdio_meta;
95	int			 mdio_starting;
96	struct callout		 mdio_start_co;	/* STARTING state timer. */
97	int			 mdio_started;
98	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
99};
100
101static g_raid_md_create_req_t g_raid_md_create_req_ddf;
102static g_raid_md_taste_t g_raid_md_taste_ddf;
103static g_raid_md_event_t g_raid_md_event_ddf;
104static g_raid_md_volume_event_t g_raid_md_volume_event_ddf;
105static g_raid_md_ctl_t g_raid_md_ctl_ddf;
106static g_raid_md_write_t g_raid_md_write_ddf;
107static g_raid_md_fail_disk_t g_raid_md_fail_disk_ddf;
108static g_raid_md_free_disk_t g_raid_md_free_disk_ddf;
109static g_raid_md_free_volume_t g_raid_md_free_volume_ddf;
110static g_raid_md_free_t g_raid_md_free_ddf;
111
112static kobj_method_t g_raid_md_ddf_methods[] = {
113	KOBJMETHOD(g_raid_md_create_req,	g_raid_md_create_req_ddf),
114	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_ddf),
115	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_ddf),
116	KOBJMETHOD(g_raid_md_volume_event,	g_raid_md_volume_event_ddf),
117	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_ddf),
118	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_ddf),
119	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_ddf),
120	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_ddf),
121	KOBJMETHOD(g_raid_md_free_volume,	g_raid_md_free_volume_ddf),
122	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_ddf),
123	{ 0, 0 }
124};
125
126static struct g_raid_md_class g_raid_md_ddf_class = {
127	"DDF",
128	g_raid_md_ddf_methods,
129	sizeof(struct g_raid_md_ddf_object),
130	.mdc_enable = 1,
131	.mdc_priority = 100
132};
133
134#define GET8(m, f)	((m)->f)
135#define GET16(m, f)	((m)->bigendian ? be16dec(&(m)->f) : le16dec(&(m)->f))
136#define GET32(m, f)	((m)->bigendian ? be32dec(&(m)->f) : le32dec(&(m)->f))
137#define GET64(m, f)	((m)->bigendian ? be64dec(&(m)->f) : le64dec(&(m)->f))
138#define GET8D(m, f)	(f)
139#define GET16D(m, f)	((m)->bigendian ? be16dec(&f) : le16dec(&f))
140#define GET32D(m, f)	((m)->bigendian ? be32dec(&f) : le32dec(&f))
141#define GET64D(m, f)	((m)->bigendian ? be64dec(&f) : le64dec(&f))
142#define GET8P(m, f)	(*(f))
143#define GET16P(m, f)	((m)->bigendian ? be16dec(f) : le16dec(f))
144#define GET32P(m, f)	((m)->bigendian ? be32dec(f) : le32dec(f))
145#define GET64P(m, f)	((m)->bigendian ? be64dec(f) : le64dec(f))
146
147#define SET8P(m, f, v)							\
148	(*(f) = (v))
149#define SET16P(m, f, v)							\
150	do {								\
151		if ((m)->bigendian)					\
152			be16enc((f), (v));				\
153		else							\
154			le16enc((f), (v));				\
155	} while (0)
156#define SET32P(m, f, v)							\
157	do {								\
158		if ((m)->bigendian)					\
159			be32enc((f), (v));				\
160		else							\
161			le32enc((f), (v));				\
162	} while (0)
163#define SET64P(m, f, v)							\
164	do {								\
165		if ((m)->bigendian)					\
166			be64enc((f), (v));				\
167		else							\
168			le64enc((f), (v));				\
169	} while (0)
170#define SET8(m, f, v)	SET8P((m), &((m)->f), (v))
171#define SET16(m, f, v)	SET16P((m), &((m)->f), (v))
172#define SET32(m, f, v)	SET32P((m), &((m)->f), (v))
173#define SET64(m, f, v)	SET64P((m), &((m)->f), (v))
174#define SET8D(m, f, v)	SET8P((m), &(f), (v))
175#define SET16D(m, f, v)	SET16P((m), &(f), (v))
176#define SET32D(m, f, v)	SET32P((m), &(f), (v))
177#define SET64D(m, f, v)	SET64P((m), &(f), (v))
178
179#define GETCRNUM(m)	(GET32((m), hdr->cr_length) /			\
180	GET16((m), hdr->Configuration_Record_Length))
181
182#define GETVDCPTR(m, n)	((struct ddf_vdc_record *)((uint8_t *)(m)->cr +	\
183	(n) * GET16((m), hdr->Configuration_Record_Length) *		\
184	(m)->sectorsize))
185
186#define GETSAPTR(m, n)	((struct ddf_sa_record *)((uint8_t *)(m)->cr +	\
187	(n) * GET16((m), hdr->Configuration_Record_Length) *		\
188	(m)->sectorsize))
189
190static int
191isff(uint8_t *buf, int size)
192{
193	int i;
194
195	for (i = 0; i < size; i++)
196		if (buf[i] != 0xff)
197			return (0);
198	return (1);
199}
200
201static void
202print_guid(uint8_t *buf)
203{
204	int i, ascii;
205
206	ascii = 1;
207	for (i = 0; i < 24; i++) {
208		if (buf[i] != 0 && (buf[i] < ' ' || buf[i] > 127)) {
209			ascii = 0;
210			break;
211		}
212	}
213	if (ascii) {
214		printf("'%.24s'", buf);
215	} else {
216		for (i = 0; i < 24; i++)
217			printf("%02x", buf[i]);
218	}
219}
220
221static void
222g_raid_md_ddf_print(struct ddf_meta *meta)
223{
224	struct ddf_vdc_record *vdc;
225	struct ddf_vuc_record *vuc;
226	struct ddf_sa_record *sa;
227	uint64_t *val2;
228	uint32_t val;
229	int i, j, k, num, num2;
230
231	if (g_raid_debug < 1)
232		return;
233
234	printf("********* DDF Metadata *********\n");
235	printf("**** Header ****\n");
236	printf("DDF_Header_GUID      ");
237	print_guid(meta->hdr->DDF_Header_GUID);
238	printf("\n");
239	printf("DDF_rev              %8.8s\n", (char *)&meta->hdr->DDF_rev[0]);
240	printf("Sequence_Number      0x%08x\n", GET32(meta, hdr->Sequence_Number));
241	printf("TimeStamp            0x%08x\n", GET32(meta, hdr->TimeStamp));
242	printf("Open_Flag            0x%02x\n", GET16(meta, hdr->Open_Flag));
243	printf("Foreign_Flag         0x%02x\n", GET16(meta, hdr->Foreign_Flag));
244	printf("Diskgrouping         0x%02x\n", GET16(meta, hdr->Diskgrouping));
245	printf("Primary_Header_LBA   %ju\n", GET64(meta, hdr->Primary_Header_LBA));
246	printf("Secondary_Header_LBA %ju\n", GET64(meta, hdr->Secondary_Header_LBA));
247	printf("WorkSpace_Length     %u\n", GET32(meta, hdr->WorkSpace_Length));
248	printf("WorkSpace_LBA        %ju\n", GET64(meta, hdr->WorkSpace_LBA));
249	printf("Max_PD_Entries       %u\n", GET16(meta, hdr->Max_PD_Entries));
250	printf("Max_VD_Entries       %u\n", GET16(meta, hdr->Max_VD_Entries));
251	printf("Max_Partitions       %u\n", GET16(meta, hdr->Max_Partitions));
252	printf("Configuration_Record_Length %u\n", GET16(meta, hdr->Configuration_Record_Length));
253	printf("Max_Primary_Element_Entries %u\n", GET16(meta, hdr->Max_Primary_Element_Entries));
254	printf("Controller Data      %u:%u\n", GET32(meta, hdr->cd_section), GET32(meta, hdr->cd_length));
255	printf("Physical Disk        %u:%u\n", GET32(meta, hdr->pdr_section), GET32(meta, hdr->pdr_length));
256	printf("Virtual Disk         %u:%u\n", GET32(meta, hdr->vdr_section), GET32(meta, hdr->vdr_length));
257	printf("Configuration Recs   %u:%u\n", GET32(meta, hdr->cr_section), GET32(meta, hdr->cr_length));
258	printf("Physical Disk Recs   %u:%u\n", GET32(meta, hdr->pdd_section), GET32(meta, hdr->pdd_length));
259	printf("BBM Log              %u:%u\n", GET32(meta, hdr->bbmlog_section), GET32(meta, hdr->bbmlog_length));
260	printf("Diagnostic Space     %u:%u\n", GET32(meta, hdr->Diagnostic_Space), GET32(meta, hdr->Diagnostic_Space_Length));
261	printf("Vendor_Specific_Logs %u:%u\n", GET32(meta, hdr->Vendor_Specific_Logs), GET32(meta, hdr->Vendor_Specific_Logs_Length));
262	printf("**** Controller Data ****\n");
263	printf("Controller_GUID      ");
264	print_guid(meta->cdr->Controller_GUID);
265	printf("\n");
266	printf("Controller_Type      0x%04x%04x 0x%04x%04x\n",
267	    GET16(meta, cdr->Controller_Type.Vendor_ID),
268	    GET16(meta, cdr->Controller_Type.Device_ID),
269	    GET16(meta, cdr->Controller_Type.SubVendor_ID),
270	    GET16(meta, cdr->Controller_Type.SubDevice_ID));
271	printf("Product_ID           '%.16s'\n", (char *)&meta->cdr->Product_ID[0]);
272	printf("**** Physical Disk Records ****\n");
273	printf("Populated_PDEs       %u\n", GET16(meta, pdr->Populated_PDEs));
274	printf("Max_PDE_Supported    %u\n", GET16(meta, pdr->Max_PDE_Supported));
275	for (j = 0; j < GET16(meta, pdr->Populated_PDEs); j++) {
276		if (isff(meta->pdr->entry[j].PD_GUID, 24))
277			continue;
278		if (GET32(meta, pdr->entry[j].PD_Reference) == 0xffffffff)
279			continue;
280		printf("PD_GUID              ");
281		print_guid(meta->pdr->entry[j].PD_GUID);
282		printf("\n");
283		printf("PD_Reference         0x%08x\n",
284		    GET32(meta, pdr->entry[j].PD_Reference));
285		printf("PD_Type              0x%04x\n",
286		    GET16(meta, pdr->entry[j].PD_Type));
287		printf("PD_State             0x%04x\n",
288		    GET16(meta, pdr->entry[j].PD_State));
289		printf("Configured_Size      %ju\n",
290		    GET64(meta, pdr->entry[j].Configured_Size));
291		printf("Block_Size           %u\n",
292		    GET16(meta, pdr->entry[j].Block_Size));
293	}
294	printf("**** Virtual Disk Records ****\n");
295	printf("Populated_VDEs       %u\n", GET16(meta, vdr->Populated_VDEs));
296	printf("Max_VDE_Supported    %u\n", GET16(meta, vdr->Max_VDE_Supported));
297	for (j = 0; j < GET16(meta, vdr->Populated_VDEs); j++) {
298		if (isff(meta->vdr->entry[j].VD_GUID, 24))
299			continue;
300		printf("VD_GUID              ");
301		print_guid(meta->vdr->entry[j].VD_GUID);
302		printf("\n");
303		printf("VD_Number            0x%04x\n",
304		    GET16(meta, vdr->entry[j].VD_Number));
305		printf("VD_Type              0x%04x\n",
306		    GET16(meta, vdr->entry[j].VD_Type));
307		printf("VD_State             0x%02x\n",
308		    GET8(meta, vdr->entry[j].VD_State));
309		printf("Init_State           0x%02x\n",
310		    GET8(meta, vdr->entry[j].Init_State));
311		printf("Drive_Failures_Remaining %u\n",
312		    GET8(meta, vdr->entry[j].Drive_Failures_Remaining));
313		printf("VD_Name              '%.16s'\n",
314		    (char *)&meta->vdr->entry[j].VD_Name);
315	}
316	printf("**** Configuration Records ****\n");
317	num = GETCRNUM(meta);
318	for (j = 0; j < num; j++) {
319		vdc = GETVDCPTR(meta, j);
320		val = GET32D(meta, vdc->Signature);
321		switch (val) {
322		case DDF_VDCR_SIGNATURE:
323			printf("** Virtual Disk Configuration **\n");
324			printf("VD_GUID              ");
325			print_guid(vdc->VD_GUID);
326			printf("\n");
327			printf("Timestamp            0x%08x\n",
328			    GET32D(meta, vdc->Timestamp));
329			printf("Sequence_Number      0x%08x\n",
330			    GET32D(meta, vdc->Sequence_Number));
331			printf("Primary_Element_Count %u\n",
332			    GET16D(meta, vdc->Primary_Element_Count));
333			printf("Stripe_Size          %u\n",
334			    GET8D(meta, vdc->Stripe_Size));
335			printf("Primary_RAID_Level   0x%02x\n",
336			    GET8D(meta, vdc->Primary_RAID_Level));
337			printf("RLQ                  0x%02x\n",
338			    GET8D(meta, vdc->RLQ));
339			printf("Secondary_Element_Count %u\n",
340			    GET8D(meta, vdc->Secondary_Element_Count));
341			printf("Secondary_Element_Seq %u\n",
342			    GET8D(meta, vdc->Secondary_Element_Seq));
343			printf("Secondary_RAID_Level 0x%02x\n",
344			    GET8D(meta, vdc->Secondary_RAID_Level));
345			printf("Block_Count          %ju\n",
346			    GET64D(meta, vdc->Block_Count));
347			printf("VD_Size              %ju\n",
348			    GET64D(meta, vdc->VD_Size));
349			printf("Block_Size           %u\n",
350			    GET16D(meta, vdc->Block_Size));
351			printf("Rotate_Parity_count  %u\n",
352			    GET8D(meta, vdc->Rotate_Parity_count));
353			printf("Associated_Spare_Disks");
354			for (i = 0; i < 8; i++) {
355				if (GET32D(meta, vdc->Associated_Spares[i]) != 0xffffffff)
356					printf(" 0x%08x", GET32D(meta, vdc->Associated_Spares[i]));
357			}
358			printf("\n");
359			printf("Cache_Flags          %016jx\n",
360			    GET64D(meta, vdc->Cache_Flags));
361			printf("BG_Rate              %u\n",
362			    GET8D(meta, vdc->BG_Rate));
363			printf("MDF_Parity_Disks     %u\n",
364			    GET8D(meta, vdc->MDF_Parity_Disks));
365			printf("MDF_Parity_Generator_Polynomial 0x%04x\n",
366			    GET16D(meta, vdc->MDF_Parity_Generator_Polynomial));
367			printf("MDF_Constant_Generation_Method 0x%02x\n",
368			    GET8D(meta, vdc->MDF_Constant_Generation_Method));
369			printf("Physical_Disks      ");
370			num2 = GET16D(meta, vdc->Primary_Element_Count);
371			val2 = (uint64_t *)&(vdc->Physical_Disk_Sequence[GET16(meta, hdr->Max_Primary_Element_Entries)]);
372			for (i = 0; i < num2; i++)
373				printf(" 0x%08x @ %ju",
374				    GET32D(meta, vdc->Physical_Disk_Sequence[i]),
375				    GET64P(meta, val2 + i));
376			printf("\n");
377			break;
378		case DDF_VUCR_SIGNATURE:
379			printf("** Vendor Unique Configuration **\n");
380			vuc = (struct ddf_vuc_record *)vdc;
381			printf("VD_GUID              ");
382			print_guid(vuc->VD_GUID);
383			printf("\n");
384			break;
385		case DDF_SA_SIGNATURE:
386			printf("** Spare Assignment Configuration **\n");
387			sa = (struct ddf_sa_record *)vdc;
388			printf("Timestamp            0x%08x\n",
389			    GET32D(meta, sa->Timestamp));
390			printf("Spare_Type           0x%02x\n",
391			    GET8D(meta, sa->Spare_Type));
392			printf("Populated_SAEs       %u\n",
393			    GET16D(meta, sa->Populated_SAEs));
394			printf("MAX_SAE_Supported    %u\n",
395			    GET16D(meta, sa->MAX_SAE_Supported));
396			for (i = 0; i < GET16D(meta, sa->Populated_SAEs); i++) {
397				if (isff(sa->entry[i].VD_GUID, 24))
398					continue;
399				printf("VD_GUID             ");
400				for (k = 0; k < 24; k++)
401					printf("%02x", sa->entry[i].VD_GUID[k]);
402				printf("\n");
403				printf("Secondary_Element   %u\n",
404				    GET16D(meta, sa->entry[i].Secondary_Element));
405			}
406			break;
407		case 0x00000000:
408		case 0xFFFFFFFF:
409			break;
410		default:
411			printf("Unknown configuration signature %08x\n", val);
412			break;
413		}
414	}
415	printf("**** Physical Disk Data ****\n");
416	printf("PD_GUID              ");
417	print_guid(meta->pdd->PD_GUID);
418	printf("\n");
419	printf("PD_Reference         0x%08x\n",
420	    GET32(meta, pdd->PD_Reference));
421	printf("Forced_Ref_Flag      0x%02x\n",
422	    GET8(meta, pdd->Forced_Ref_Flag));
423	printf("Forced_PD_GUID_Flag  0x%02x\n",
424	    GET8(meta, pdd->Forced_PD_GUID_Flag));
425}
426
427static int
428ddf_meta_find_pd(struct ddf_meta *meta, uint8_t *GUID, uint32_t PD_Reference)
429{
430	int i;
431
432	for (i = 0; i < GET16(meta, pdr->Populated_PDEs); i++) {
433		if (GUID != NULL) {
434			if (memcmp(meta->pdr->entry[i].PD_GUID, GUID, 24) == 0)
435				return (i);
436		} else if (PD_Reference != 0xffffffff) {
437			if (GET32(meta, pdr->entry[i].PD_Reference) == PD_Reference)
438				return (i);
439		} else
440			if (isff(meta->pdr->entry[i].PD_GUID, 24))
441				return (i);
442	}
443	if (GUID == NULL && PD_Reference == 0xffffffff) {
444		if (i >= GET16(meta, pdr->Max_PDE_Supported))
445			return (-1);
446		SET16(meta, pdr->Populated_PDEs, i + 1);
447		return (i);
448	}
449	return (-1);
450}
451
452static int
453ddf_meta_find_vd(struct ddf_meta *meta, uint8_t *GUID)
454{
455	int i;
456
457	for (i = 0; i < GET16(meta, vdr->Populated_VDEs); i++) {
458		if (GUID != NULL) {
459			if (memcmp(meta->vdr->entry[i].VD_GUID, GUID, 24) == 0)
460				return (i);
461		} else
462			if (isff(meta->vdr->entry[i].VD_GUID, 24))
463				return (i);
464	}
465	if (GUID == NULL) {
466		if (i >= GET16(meta, vdr->Max_VDE_Supported))
467			return (-1);
468		SET16(meta, vdr->Populated_VDEs, i + 1);
469		return (i);
470	}
471	return (-1);
472}
473
474static struct ddf_vdc_record *
475ddf_meta_find_vdc(struct ddf_meta *meta, uint8_t *GUID)
476{
477	struct ddf_vdc_record *vdc;
478	int i, num;
479
480	num = GETCRNUM(meta);
481	for (i = 0; i < num; i++) {
482		vdc = GETVDCPTR(meta, i);
483		if (GUID != NULL) {
484			if (GET32D(meta, vdc->Signature) == DDF_VDCR_SIGNATURE &&
485			    memcmp(vdc->VD_GUID, GUID, 24) == 0)
486				return (vdc);
487		} else
488			if (GET32D(meta, vdc->Signature) == 0xffffffff ||
489			    GET32D(meta, vdc->Signature) == 0)
490				return (vdc);
491	}
492	return (NULL);
493}
494
495static int
496ddf_meta_count_vdc(struct ddf_meta *meta, uint8_t *GUID)
497{
498	struct ddf_vdc_record *vdc;
499	int i, num, cnt;
500
501	cnt = 0;
502	num = GETCRNUM(meta);
503	for (i = 0; i < num; i++) {
504		vdc = GETVDCPTR(meta, i);
505		if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE)
506			continue;
507		if (GUID == NULL || memcmp(vdc->VD_GUID, GUID, 24) == 0)
508			cnt++;
509	}
510	return (cnt);
511}
512
513static int
514ddf_meta_find_disk(struct ddf_vol_meta *vmeta, uint32_t PD_Reference,
515    int *bvdp, int *posp)
516{
517	int i, bvd, pos;
518
519	i = 0;
520	for (bvd = 0; bvd < GET8(vmeta, vdc->Secondary_Element_Count); bvd++) {
521		if (vmeta->bvdc[bvd] == NULL) {
522			i += GET16(vmeta, vdc->Primary_Element_Count); // XXX
523			continue;
524		}
525		for (pos = 0; pos < GET16(vmeta, bvdc[bvd]->Primary_Element_Count);
526		    pos++, i++) {
527			if (GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos]) ==
528			    PD_Reference) {
529				if (bvdp != NULL)
530					*bvdp = bvd;
531				if (posp != NULL)
532					*posp = pos;
533				return (i);
534			}
535		}
536	}
537	return (-1);
538}
539
540static struct ddf_sa_record *
541ddf_meta_find_sa(struct ddf_meta *meta, int create)
542{
543	struct ddf_sa_record *sa;
544	int i, num;
545
546	num = GETCRNUM(meta);
547	for (i = 0; i < num; i++) {
548		sa = GETSAPTR(meta, i);
549		if (GET32D(meta, sa->Signature) == DDF_SA_SIGNATURE)
550			return (sa);
551	}
552	if (create) {
553		for (i = 0; i < num; i++) {
554			sa = GETSAPTR(meta, i);
555			if (GET32D(meta, sa->Signature) == 0xffffffff ||
556			    GET32D(meta, sa->Signature) == 0)
557				return (sa);
558		}
559	}
560	return (NULL);
561}
562
563static void
564ddf_meta_create(struct g_raid_disk *disk, struct ddf_meta *sample)
565{
566	struct timespec ts;
567	struct clocktime ct;
568	struct g_raid_md_ddf_perdisk *pd;
569	struct g_raid_md_ddf_object *mdi;
570	struct ddf_meta *meta;
571	struct ddf_pd_entry *pde;
572	off_t anchorlba;
573	u_int ss, pos, size;
574	int len, error;
575	char serial_buffer[24];
576
577	if (sample->hdr == NULL)
578		sample = NULL;
579
580	mdi = (struct g_raid_md_ddf_object *)disk->d_softc->sc_md;
581	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
582	meta = &pd->pd_meta;
583	ss = disk->d_consumer->provider->sectorsize;
584	anchorlba = disk->d_consumer->provider->mediasize / ss - 1;
585
586	meta->sectorsize = ss;
587	meta->bigendian = sample ? sample->bigendian : mdi->mdio_bigendian;
588	getnanotime(&ts);
589	clock_ts_to_ct(&ts, &ct);
590
591	/* Header */
592	meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
593	memset(meta->hdr, 0xff, ss);
594	if (sample) {
595		memcpy(meta->hdr, sample->hdr, sizeof(struct ddf_header));
596		if (ss != sample->sectorsize) {
597			SET32(meta, hdr->WorkSpace_Length,
598			    howmany(GET32(sample, hdr->WorkSpace_Length) *
599			        sample->sectorsize, ss));
600			SET16(meta, hdr->Configuration_Record_Length,
601			    howmany(GET16(sample,
602			        hdr->Configuration_Record_Length) *
603				sample->sectorsize, ss));
604			SET32(meta, hdr->cd_length,
605			    howmany(GET32(sample, hdr->cd_length) *
606			        sample->sectorsize, ss));
607			SET32(meta, hdr->pdr_length,
608			    howmany(GET32(sample, hdr->pdr_length) *
609			        sample->sectorsize, ss));
610			SET32(meta, hdr->vdr_length,
611			    howmany(GET32(sample, hdr->vdr_length) *
612			        sample->sectorsize, ss));
613			SET32(meta, hdr->cr_length,
614			    howmany(GET32(sample, hdr->cr_length) *
615			        sample->sectorsize, ss));
616			SET32(meta, hdr->pdd_length,
617			    howmany(GET32(sample, hdr->pdd_length) *
618			        sample->sectorsize, ss));
619			SET32(meta, hdr->bbmlog_length,
620			    howmany(GET32(sample, hdr->bbmlog_length) *
621			        sample->sectorsize, ss));
622			SET32(meta, hdr->Diagnostic_Space,
623			    howmany(GET32(sample, hdr->bbmlog_length) *
624			        sample->sectorsize, ss));
625			SET32(meta, hdr->Vendor_Specific_Logs,
626			    howmany(GET32(sample, hdr->bbmlog_length) *
627			        sample->sectorsize, ss));
628		}
629	} else {
630		SET32(meta, hdr->Signature, DDF_HEADER_SIGNATURE);
631		snprintf(meta->hdr->DDF_Header_GUID, 25, "FreeBSD %08x%08x",
632		    (u_int)(ts.tv_sec - DECADE), arc4random());
633		memcpy(meta->hdr->DDF_rev, "02.00.00", 8);
634		SET32(meta, hdr->TimeStamp, (ts.tv_sec - DECADE));
635		SET32(meta, hdr->WorkSpace_Length, 16 * 1024 * 1024 / ss);
636		SET16(meta, hdr->Max_PD_Entries, DDF_MAX_DISKS - 1);
637		SET16(meta, hdr->Max_VD_Entries, DDF_MAX_VDISKS);
638		SET16(meta, hdr->Max_Partitions, DDF_MAX_PARTITIONS);
639		SET16(meta, hdr->Max_Primary_Element_Entries, DDF_MAX_DISKS);
640		SET16(meta, hdr->Configuration_Record_Length,
641		    howmany(sizeof(struct ddf_vdc_record) + (4 + 8) *
642		        GET16(meta, hdr->Max_Primary_Element_Entries), ss));
643		SET32(meta, hdr->cd_length,
644		    howmany(sizeof(struct ddf_cd_record), ss));
645		SET32(meta, hdr->pdr_length,
646		    howmany(sizeof(struct ddf_pd_record) +
647		        sizeof(struct ddf_pd_entry) * GET16(meta,
648			hdr->Max_PD_Entries), ss));
649		SET32(meta, hdr->vdr_length,
650		    howmany(sizeof(struct ddf_vd_record) +
651		        sizeof(struct ddf_vd_entry) *
652			GET16(meta, hdr->Max_VD_Entries), ss));
653		SET32(meta, hdr->cr_length,
654		    GET16(meta, hdr->Configuration_Record_Length) *
655		    (GET16(meta, hdr->Max_Partitions) + 1));
656		SET32(meta, hdr->pdd_length,
657		    howmany(sizeof(struct ddf_pdd_record), ss));
658		SET32(meta, hdr->bbmlog_length, 0);
659		SET32(meta, hdr->Diagnostic_Space_Length, 0);
660		SET32(meta, hdr->Vendor_Specific_Logs_Length, 0);
661	}
662	pos = 1;
663	SET32(meta, hdr->cd_section, pos);
664	pos += GET32(meta, hdr->cd_length);
665	SET32(meta, hdr->pdr_section, pos);
666	pos += GET32(meta, hdr->pdr_length);
667	SET32(meta, hdr->vdr_section, pos);
668	pos += GET32(meta, hdr->vdr_length);
669	SET32(meta, hdr->cr_section, pos);
670	pos += GET32(meta, hdr->cr_length);
671	SET32(meta, hdr->pdd_section, pos);
672	pos += GET32(meta, hdr->pdd_length);
673	SET32(meta, hdr->bbmlog_section,
674	    GET32(meta, hdr->bbmlog_length) != 0 ? pos : 0xffffffff);
675	pos += GET32(meta, hdr->bbmlog_length);
676	SET32(meta, hdr->Diagnostic_Space,
677	    GET32(meta, hdr->Diagnostic_Space_Length) != 0 ? pos : 0xffffffff);
678	pos += GET32(meta, hdr->Diagnostic_Space_Length);
679	SET32(meta, hdr->Vendor_Specific_Logs,
680	    GET32(meta, hdr->Vendor_Specific_Logs_Length) != 0 ? pos : 0xffffffff);
681	pos += min(GET32(meta, hdr->Vendor_Specific_Logs_Length), 1);
682	SET64(meta, hdr->Primary_Header_LBA,
683	    anchorlba - pos);
684	SET64(meta, hdr->Secondary_Header_LBA,
685	    0xffffffffffffffffULL);
686	SET64(meta, hdr->WorkSpace_LBA,
687	    anchorlba + 1 - 32 * 1024 * 1024 / ss);
688
689	/* Controller Data */
690	size = GET32(meta, hdr->cd_length) * ss;
691	meta->cdr = malloc(size, M_MD_DDF, M_WAITOK);
692	memset(meta->cdr, 0xff, size);
693	SET32(meta, cdr->Signature, DDF_CONTROLLER_DATA_SIGNATURE);
694	memcpy(meta->cdr->Controller_GUID, "FreeBSD GEOM RAID SERIAL", 24);
695	memcpy(meta->cdr->Product_ID, "FreeBSD GEOMRAID", 16);
696
697	/* Physical Drive Records. */
698	size = GET32(meta, hdr->pdr_length) * ss;
699	meta->pdr = malloc(size, M_MD_DDF, M_WAITOK);
700	memset(meta->pdr, 0xff, size);
701	SET32(meta, pdr->Signature, DDF_PDR_SIGNATURE);
702	SET16(meta, pdr->Populated_PDEs, 1);
703	SET16(meta, pdr->Max_PDE_Supported,
704	    GET16(meta, hdr->Max_PD_Entries));
705
706	pde = &meta->pdr->entry[0];
707	len = sizeof(serial_buffer);
708	error = g_io_getattr("GEOM::ident", disk->d_consumer, &len, serial_buffer);
709	if (error == 0 && (len = strlen (serial_buffer)) >= 6 && len <= 20)
710		snprintf(pde->PD_GUID, 25, "DISK%20s", serial_buffer);
711	else
712		snprintf(pde->PD_GUID, 25, "DISK%04d%02d%02d%08x%04x",
713		    ct.year, ct.mon, ct.day,
714		    arc4random(), arc4random() & 0xffff);
715	SET32D(meta, pde->PD_Reference, arc4random());
716	SET16D(meta, pde->PD_Type, DDF_PDE_GUID_FORCE);
717	SET16D(meta, pde->PD_State, 0);
718	SET64D(meta, pde->Configured_Size,
719	    anchorlba + 1 - 32 * 1024 * 1024 / ss);
720	SET16D(meta, pde->Block_Size, ss);
721
722	/* Virtual Drive Records. */
723	size = GET32(meta, hdr->vdr_length) * ss;
724	meta->vdr = malloc(size, M_MD_DDF, M_WAITOK);
725	memset(meta->vdr, 0xff, size);
726	SET32(meta, vdr->Signature, DDF_VD_RECORD_SIGNATURE);
727	SET32(meta, vdr->Populated_VDEs, 0);
728	SET16(meta, vdr->Max_VDE_Supported,
729	    GET16(meta, hdr->Max_VD_Entries));
730
731	/* Configuration Records. */
732	size = GET32(meta, hdr->cr_length) * ss;
733	meta->cr = malloc(size, M_MD_DDF, M_WAITOK);
734	memset(meta->cr, 0xff, size);
735
736	/* Physical Disk Data. */
737	size = GET32(meta, hdr->pdd_length) * ss;
738	meta->pdd = malloc(size, M_MD_DDF, M_WAITOK);
739	memset(meta->pdd, 0xff, size);
740	SET32(meta, pdd->Signature, DDF_PDD_SIGNATURE);
741	memcpy(meta->pdd->PD_GUID, pde->PD_GUID, 24);
742	SET32(meta, pdd->PD_Reference, GET32D(meta, pde->PD_Reference));
743	SET8(meta, pdd->Forced_Ref_Flag, DDF_PDD_FORCED_REF);
744	SET8(meta, pdd->Forced_PD_GUID_Flag, DDF_PDD_FORCED_GUID);
745
746	/* Bad Block Management Log. */
747	if (GET32(meta, hdr->bbmlog_length) != 0) {
748		size = GET32(meta, hdr->bbmlog_length) * ss;
749		meta->bbm = malloc(size, M_MD_DDF, M_WAITOK);
750		memset(meta->bbm, 0xff, size);
751		SET32(meta, bbm->Signature, DDF_BBML_SIGNATURE);
752		SET32(meta, bbm->Entry_Count, 0);
753		SET32(meta, bbm->Spare_Block_Count, 0);
754	}
755}
756
757static void
758ddf_meta_copy(struct ddf_meta *dst, struct ddf_meta *src)
759{
760	struct ddf_header *hdr;
761	u_int ss;
762
763	hdr = src->hdr;
764	dst->bigendian = src->bigendian;
765	ss = dst->sectorsize = src->sectorsize;
766	dst->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
767	memcpy(dst->hdr, src->hdr, ss);
768	dst->cdr = malloc(GET32(src, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK);
769	memcpy(dst->cdr, src->cdr, GET32(src, hdr->cd_length) * ss);
770	dst->pdr = malloc(GET32(src, hdr->pdr_length) * ss, M_MD_DDF, M_WAITOK);
771	memcpy(dst->pdr, src->pdr, GET32(src, hdr->pdr_length) * ss);
772	dst->vdr = malloc(GET32(src, hdr->vdr_length) * ss, M_MD_DDF, M_WAITOK);
773	memcpy(dst->vdr, src->vdr, GET32(src, hdr->vdr_length) * ss);
774	dst->cr = malloc(GET32(src, hdr->cr_length) * ss, M_MD_DDF, M_WAITOK);
775	memcpy(dst->cr, src->cr, GET32(src, hdr->cr_length) * ss);
776	dst->pdd = malloc(GET32(src, hdr->pdd_length) * ss, M_MD_DDF, M_WAITOK);
777	memcpy(dst->pdd, src->pdd, GET32(src, hdr->pdd_length) * ss);
778	if (src->bbm != NULL) {
779		dst->bbm = malloc(GET32(src, hdr->bbmlog_length) * ss, M_MD_DDF, M_WAITOK);
780		memcpy(dst->bbm, src->bbm, GET32(src, hdr->bbmlog_length) * ss);
781	}
782}
783
784static void
785ddf_meta_update(struct ddf_meta *meta, struct ddf_meta *src)
786{
787	struct ddf_pd_entry *pde, *spde;
788	int i, j;
789
790	for (i = 0; i < GET16(src, pdr->Populated_PDEs); i++) {
791		spde = &src->pdr->entry[i];
792		if (isff(spde->PD_GUID, 24))
793			continue;
794		j = ddf_meta_find_pd(meta, NULL,
795		    GET32(src, pdr->entry[i].PD_Reference));
796		if (j < 0) {
797			j = ddf_meta_find_pd(meta, NULL, 0xffffffff);
798			pde = &meta->pdr->entry[j];
799			memcpy(pde, spde, sizeof(*pde));
800		} else {
801			pde = &meta->pdr->entry[j];
802			SET16D(meta, pde->PD_State,
803			    GET16D(meta, pde->PD_State) |
804			    GET16D(src, pde->PD_State));
805		}
806	}
807}
808
809static void
810ddf_meta_free(struct ddf_meta *meta)
811{
812
813	if (meta->hdr != NULL) {
814		free(meta->hdr, M_MD_DDF);
815		meta->hdr = NULL;
816	}
817	if (meta->cdr != NULL) {
818		free(meta->cdr, M_MD_DDF);
819		meta->cdr = NULL;
820	}
821	if (meta->pdr != NULL) {
822		free(meta->pdr, M_MD_DDF);
823		meta->pdr = NULL;
824	}
825	if (meta->vdr != NULL) {
826		free(meta->vdr, M_MD_DDF);
827		meta->vdr = NULL;
828	}
829	if (meta->cr != NULL) {
830		free(meta->cr, M_MD_DDF);
831		meta->cr = NULL;
832	}
833	if (meta->pdd != NULL) {
834		free(meta->pdd, M_MD_DDF);
835		meta->pdd = NULL;
836	}
837	if (meta->bbm != NULL) {
838		free(meta->bbm, M_MD_DDF);
839		meta->bbm = NULL;
840	}
841}
842
843static void
844ddf_vol_meta_create(struct ddf_vol_meta *meta, struct ddf_meta *sample)
845{
846	struct timespec ts;
847	struct clocktime ct;
848	struct ddf_header *hdr;
849	u_int ss, size;
850
851	hdr = sample->hdr;
852	meta->bigendian = sample->bigendian;
853	ss = meta->sectorsize = sample->sectorsize;
854	meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
855	memcpy(meta->hdr, sample->hdr, ss);
856	meta->cdr = malloc(GET32(sample, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK);
857	memcpy(meta->cdr, sample->cdr, GET32(sample, hdr->cd_length) * ss);
858	meta->vde = malloc(sizeof(struct ddf_vd_entry), M_MD_DDF, M_WAITOK);
859	memset(meta->vde, 0xff, sizeof(struct ddf_vd_entry));
860	getnanotime(&ts);
861	clock_ts_to_ct(&ts, &ct);
862	snprintf(meta->vde->VD_GUID, 25, "FreeBSD%04d%02d%02d%08x%01x",
863	    ct.year, ct.mon, ct.day,
864	    arc4random(), arc4random() & 0xf);
865	size = GET16(sample, hdr->Configuration_Record_Length) * ss;
866	meta->vdc = malloc(size, M_MD_DDF, M_WAITOK);
867	memset(meta->vdc, 0xff, size);
868	SET32(meta, vdc->Signature, DDF_VDCR_SIGNATURE);
869	memcpy(meta->vdc->VD_GUID, meta->vde->VD_GUID, 24);
870	SET32(meta, vdc->Sequence_Number, 0);
871}
872
873static void
874ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src,
875    uint8_t *GUID, int started)
876{
877	struct ddf_header *hdr;
878	struct ddf_vd_entry *vde;
879	struct ddf_vdc_record *vdc;
880	int vnew, bvnew, bvd, size;
881	u_int ss;
882
883	hdr = src->hdr;
884	vde = &src->vdr->entry[ddf_meta_find_vd(src, GUID)];
885	vdc = ddf_meta_find_vdc(src, GUID);
886	if (GET8D(src, vdc->Secondary_Element_Count) == 1)
887		bvd = 0;
888	else
889		bvd = GET8D(src, vdc->Secondary_Element_Seq);
890	size = GET16(src, hdr->Configuration_Record_Length) * src->sectorsize;
891
892	if (dst->vdc == NULL ||
893	    (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
894	    GET32(dst, vdc->Sequence_Number))) > 0))
895		vnew = 1;
896	else
897		vnew = 0;
898
899	if (dst->bvdc[bvd] == NULL ||
900	    (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
901	    GET32(dst, bvdc[bvd]->Sequence_Number))) > 0))
902		bvnew = 1;
903	else
904		bvnew = 0;
905
906	if (vnew) {
907		dst->bigendian = src->bigendian;
908		ss = dst->sectorsize = src->sectorsize;
909		if (dst->hdr != NULL)
910			free(dst->hdr, M_MD_DDF);
911		dst->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
912		memcpy(dst->hdr, src->hdr, ss);
913		if (dst->cdr != NULL)
914			free(dst->cdr, M_MD_DDF);
915		dst->cdr = malloc(GET32(src, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK);
916		memcpy(dst->cdr, src->cdr, GET32(src, hdr->cd_length) * ss);
917		if (dst->vde != NULL)
918			free(dst->vde, M_MD_DDF);
919		dst->vde = malloc(sizeof(struct ddf_vd_entry), M_MD_DDF, M_WAITOK);
920		memcpy(dst->vde, vde, sizeof(struct ddf_vd_entry));
921		if (dst->vdc != NULL)
922			free(dst->vdc, M_MD_DDF);
923		dst->vdc = malloc(size, M_MD_DDF, M_WAITOK);
924		memcpy(dst->vdc, vdc, size);
925	}
926	if (bvnew) {
927		if (dst->bvdc[bvd] != NULL)
928			free(dst->bvdc[bvd], M_MD_DDF);
929		dst->bvdc[bvd] = malloc(size, M_MD_DDF, M_WAITOK);
930		memcpy(dst->bvdc[bvd], vdc, size);
931	}
932}
933
934static void
935ddf_vol_meta_free(struct ddf_vol_meta *meta)
936{
937	int i;
938
939	if (meta->hdr != NULL) {
940		free(meta->hdr, M_MD_DDF);
941		meta->hdr = NULL;
942	}
943	if (meta->cdr != NULL) {
944		free(meta->cdr, M_MD_DDF);
945		meta->cdr = NULL;
946	}
947	if (meta->vde != NULL) {
948		free(meta->vde, M_MD_DDF);
949		meta->vde = NULL;
950	}
951	if (meta->vdc != NULL) {
952		free(meta->vdc, M_MD_DDF);
953		meta->vdc = NULL;
954	}
955	for (i = 0; i < DDF_MAX_DISKS_HARD; i++) {
956		if (meta->bvdc[i] != NULL) {
957			free(meta->bvdc[i], M_MD_DDF);
958			meta->bvdc[i] = NULL;
959		}
960	}
961}
962
963static int
964ddf_meta_unused_range(struct ddf_meta *meta, off_t *off, off_t *size)
965{
966	struct ddf_vdc_record *vdc;
967	off_t beg[32], end[32], beg1, end1;
968	uint64_t *offp;
969	int i, j, n, num, pos;
970	uint32_t ref;
971
972	*off = 0;
973	*size = 0;
974	ref = GET32(meta, pdd->PD_Reference);
975	pos = ddf_meta_find_pd(meta, NULL, ref);
976	beg[0] = 0;
977	end[0] = GET64(meta, pdr->entry[pos].Configured_Size);
978	n = 1;
979	num = GETCRNUM(meta);
980	for (i = 0; i < num; i++) {
981		vdc = GETVDCPTR(meta, i);
982		if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE)
983			continue;
984		for (pos = 0; pos < GET16D(meta, vdc->Primary_Element_Count); pos++)
985			if (GET32D(meta, vdc->Physical_Disk_Sequence[pos]) == ref)
986				break;
987		if (pos == GET16D(meta, vdc->Primary_Element_Count))
988			continue;
989		offp = (uint64_t *)&(vdc->Physical_Disk_Sequence[
990		    GET16(meta, hdr->Max_Primary_Element_Entries)]);
991		beg1 = GET64P(meta, offp + pos);
992		end1 = beg1 + GET64D(meta, vdc->Block_Count);
993		for (j = 0; j < n; j++) {
994			if (beg[j] >= end1 || end[j] <= beg1 )
995				continue;
996			if (beg[j] < beg1 && end[j] > end1) {
997				beg[n] = end1;
998				end[n] = end[j];
999				end[j] = beg1;
1000				n++;
1001			} else if (beg[j] < beg1)
1002				end[j] = beg1;
1003			else
1004				beg[j] = end1;
1005		}
1006	}
1007	for (j = 0; j < n; j++) {
1008		if (end[j] - beg[j] > *size) {
1009			*off = beg[j];
1010			*size = end[j] - beg[j];
1011		}
1012	}
1013	return ((*size > 0) ? 1 : 0);
1014}
1015
1016static void
1017ddf_meta_get_name(struct ddf_meta *meta, int num, char *buf)
1018{
1019	const char *b;
1020	int i;
1021
1022	b = meta->vdr->entry[num].VD_Name;
1023	for (i = 15; i >= 0; i--)
1024		if (b[i] != 0x20)
1025			break;
1026	memcpy(buf, b, i + 1);
1027	buf[i + 1] = 0;
1028}
1029
1030static void
1031ddf_meta_put_name(struct ddf_vol_meta *meta, char *buf)
1032{
1033	int len;
1034
1035	len = min(strlen(buf), 16);
1036	memset(meta->vde->VD_Name, 0x20, 16);
1037	memcpy(meta->vde->VD_Name, buf, len);
1038}
1039
1040static int
1041ddf_meta_read(struct g_consumer *cp, struct ddf_meta *meta)
1042{
1043	struct g_provider *pp;
1044	struct ddf_header *ahdr, *hdr;
1045	char *abuf, *buf;
1046	off_t plba, slba, lba;
1047	int error, len, i;
1048	u_int ss;
1049	uint32_t val;
1050
1051	ddf_meta_free(meta);
1052	pp = cp->provider;
1053	ss = meta->sectorsize = pp->sectorsize;
1054	/* Read anchor block. */
1055	abuf = g_read_data(cp, pp->mediasize - ss, ss, &error);
1056	if (abuf == NULL) {
1057		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
1058		    pp->name, error);
1059		return (error);
1060	}
1061	ahdr = (struct ddf_header *)abuf;
1062
1063	/* Check if this is an DDF RAID struct */
1064	if (be32dec(&ahdr->Signature) == DDF_HEADER_SIGNATURE)
1065		meta->bigendian = 1;
1066	else if (le32dec(&ahdr->Signature) == DDF_HEADER_SIGNATURE)
1067		meta->bigendian = 0;
1068	else {
1069		G_RAID_DEBUG(1, "DDF signature check failed on %s", pp->name);
1070		error = EINVAL;
1071		goto done;
1072	}
1073	if (ahdr->Header_Type != DDF_HEADER_ANCHOR) {
1074		G_RAID_DEBUG(1, "DDF header type check failed on %s", pp->name);
1075		error = EINVAL;
1076		goto done;
1077	}
1078	meta->hdr = ahdr;
1079	plba = GET64(meta, hdr->Primary_Header_LBA);
1080	slba = GET64(meta, hdr->Secondary_Header_LBA);
1081	val = GET32(meta, hdr->CRC);
1082	SET32(meta, hdr->CRC, 0xffffffff);
1083	meta->hdr = NULL;
1084	if (crc32(ahdr, ss) != val) {
1085		G_RAID_DEBUG(1, "DDF CRC mismatch on %s", pp->name);
1086		error = EINVAL;
1087		goto done;
1088	}
1089	if ((plba + 6) * ss >= pp->mediasize) {
1090		G_RAID_DEBUG(1, "DDF primary header LBA is wrong on %s", pp->name);
1091		error = EINVAL;
1092		goto done;
1093	}
1094	if (slba != -1 && (slba + 6) * ss >= pp->mediasize) {
1095		G_RAID_DEBUG(1, "DDF secondary header LBA is wrong on %s", pp->name);
1096		error = EINVAL;
1097		goto done;
1098	}
1099	lba = plba;
1100
1101doread:
1102	error = 0;
1103	ddf_meta_free(meta);
1104
1105	/* Read header block. */
1106	buf = g_read_data(cp, lba * ss, ss, &error);
1107	if (buf == NULL) {
1108readerror:
1109		G_RAID_DEBUG(1, "DDF %s metadata read error on %s (error=%d).",
1110		    (lba == plba) ? "primary" : "secondary", pp->name, error);
1111		if (lba == plba && slba != -1) {
1112			lba = slba;
1113			goto doread;
1114		}
1115		G_RAID_DEBUG(1, "DDF metadata read error on %s.", pp->name);
1116		goto done;
1117	}
1118	meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK);
1119	memcpy(meta->hdr, buf, ss);
1120	g_free(buf);
1121	hdr = meta->hdr;
1122	val = GET32(meta, hdr->CRC);
1123	SET32(meta, hdr->CRC, 0xffffffff);
1124	if (hdr->Signature != ahdr->Signature ||
1125	    crc32(meta->hdr, ss) != val ||
1126	    memcmp(hdr->DDF_Header_GUID, ahdr->DDF_Header_GUID, 24) ||
1127	    GET64(meta, hdr->Primary_Header_LBA) != plba ||
1128	    GET64(meta, hdr->Secondary_Header_LBA) != slba) {
1129hdrerror:
1130		G_RAID_DEBUG(1, "DDF %s metadata check failed on %s",
1131		    (lba == plba) ? "primary" : "secondary", pp->name);
1132		if (lba == plba && slba != -1) {
1133			lba = slba;
1134			goto doread;
1135		}
1136		G_RAID_DEBUG(1, "DDF metadata check failed on %s", pp->name);
1137		error = EINVAL;
1138		goto done;
1139	}
1140	if ((lba == plba && hdr->Header_Type != DDF_HEADER_PRIMARY) ||
1141	    (lba == slba && hdr->Header_Type != DDF_HEADER_SECONDARY))
1142		goto hdrerror;
1143	len = 1;
1144	len = max(len, GET32(meta, hdr->cd_section) + GET32(meta, hdr->cd_length));
1145	len = max(len, GET32(meta, hdr->pdr_section) + GET32(meta, hdr->pdr_length));
1146	len = max(len, GET32(meta, hdr->vdr_section) + GET32(meta, hdr->vdr_length));
1147	len = max(len, GET32(meta, hdr->cr_section) + GET32(meta, hdr->cr_length));
1148	len = max(len, GET32(meta, hdr->pdd_section) + GET32(meta, hdr->pdd_length));
1149	if ((val = GET32(meta, hdr->bbmlog_section)) != 0xffffffff)
1150		len = max(len, val + GET32(meta, hdr->bbmlog_length));
1151	if ((val = GET32(meta, hdr->Diagnostic_Space)) != 0xffffffff)
1152		len = max(len, val + GET32(meta, hdr->Diagnostic_Space_Length));
1153	if ((val = GET32(meta, hdr->Vendor_Specific_Logs)) != 0xffffffff)
1154		len = max(len, val + GET32(meta, hdr->Vendor_Specific_Logs_Length));
1155	if ((plba + len) * ss >= pp->mediasize)
1156		goto hdrerror;
1157	if (slba != -1 && (slba + len) * ss >= pp->mediasize)
1158		goto hdrerror;
1159	/* Workaround for Adaptec implementation. */
1160	if (GET16(meta, hdr->Max_Primary_Element_Entries) == 0xffff) {
1161		SET16(meta, hdr->Max_Primary_Element_Entries,
1162		    min(GET16(meta, hdr->Max_PD_Entries),
1163		    (GET16(meta, hdr->Configuration_Record_Length) * ss - 512) / 12));
1164	}
1165
1166	if (GET32(meta, hdr->cd_length) * ss >= MAXPHYS ||
1167	    GET32(meta, hdr->pdr_length) * ss >= MAXPHYS ||
1168	    GET32(meta, hdr->vdr_length) * ss >= MAXPHYS ||
1169	    GET32(meta, hdr->cr_length) * ss >= MAXPHYS ||
1170	    GET32(meta, hdr->pdd_length) * ss >= MAXPHYS ||
1171	    GET32(meta, hdr->bbmlog_length) * ss >= MAXPHYS) {
1172		G_RAID_DEBUG(1, "%s: Blocksize is too big.", pp->name);
1173		goto hdrerror;
1174	}
1175
1176	/* Read controller data. */
1177	buf = g_read_data(cp, (lba + GET32(meta, hdr->cd_section)) * ss,
1178	    GET32(meta, hdr->cd_length) * ss, &error);
1179	if (buf == NULL)
1180		goto readerror;
1181	meta->cdr = malloc(GET32(meta, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK);
1182	memcpy(meta->cdr, buf, GET32(meta, hdr->cd_length) * ss);
1183	g_free(buf);
1184	if (GET32(meta, cdr->Signature) != DDF_CONTROLLER_DATA_SIGNATURE)
1185		goto hdrerror;
1186
1187	/* Read physical disk records. */
1188	buf = g_read_data(cp, (lba + GET32(meta, hdr->pdr_section)) * ss,
1189	    GET32(meta, hdr->pdr_length) * ss, &error);
1190	if (buf == NULL)
1191		goto readerror;
1192	meta->pdr = malloc(GET32(meta, hdr->pdr_length) * ss, M_MD_DDF, M_WAITOK);
1193	memcpy(meta->pdr, buf, GET32(meta, hdr->pdr_length) * ss);
1194	g_free(buf);
1195	if (GET32(meta, pdr->Signature) != DDF_PDR_SIGNATURE)
1196		goto hdrerror;
1197	/*
1198	 * Workaround for reading metadata corrupted due to graid bug.
1199	 * XXX: Remove this before we have disks above 128PB. :)
1200	 */
1201	if (meta->bigendian) {
1202		for (i = 0; i < GET16(meta, pdr->Populated_PDEs); i++) {
1203			if (isff(meta->pdr->entry[i].PD_GUID, 24))
1204				continue;
1205			if (GET32(meta, pdr->entry[i].PD_Reference) ==
1206			    0xffffffff)
1207				continue;
1208			if (GET64(meta, pdr->entry[i].Configured_Size) >=
1209			     (1ULL << 48)) {
1210				SET16(meta, pdr->entry[i].PD_State,
1211				    GET16(meta, pdr->entry[i].PD_State) &
1212				    ~DDF_PDE_FAILED);
1213				SET64(meta, pdr->entry[i].Configured_Size,
1214				    GET64(meta, pdr->entry[i].Configured_Size) &
1215				    ((1ULL << 48) - 1));
1216			}
1217		}
1218	}
1219
1220	/* Read virtual disk records. */
1221	buf = g_read_data(cp, (lba + GET32(meta, hdr->vdr_section)) * ss,
1222	    GET32(meta, hdr->vdr_length) * ss, &error);
1223	if (buf == NULL)
1224		goto readerror;
1225	meta->vdr = malloc(GET32(meta, hdr->vdr_length) * ss, M_MD_DDF, M_WAITOK);
1226	memcpy(meta->vdr, buf, GET32(meta, hdr->vdr_length) * ss);
1227	g_free(buf);
1228	if (GET32(meta, vdr->Signature) != DDF_VD_RECORD_SIGNATURE)
1229		goto hdrerror;
1230
1231	/* Read configuration records. */
1232	buf = g_read_data(cp, (lba + GET32(meta, hdr->cr_section)) * ss,
1233	    GET32(meta, hdr->cr_length) * ss, &error);
1234	if (buf == NULL)
1235		goto readerror;
1236	meta->cr = malloc(GET32(meta, hdr->cr_length) * ss, M_MD_DDF, M_WAITOK);
1237	memcpy(meta->cr, buf, GET32(meta, hdr->cr_length) * ss);
1238	g_free(buf);
1239
1240	/* Read physical disk data. */
1241	buf = g_read_data(cp, (lba + GET32(meta, hdr->pdd_section)) * ss,
1242	    GET32(meta, hdr->pdd_length) * ss, &error);
1243	if (buf == NULL)
1244		goto readerror;
1245	meta->pdd = malloc(GET32(meta, hdr->pdd_length) * ss, M_MD_DDF, M_WAITOK);
1246	memcpy(meta->pdd, buf, GET32(meta, hdr->pdd_length) * ss);
1247	g_free(buf);
1248	if (GET32(meta, pdd->Signature) != DDF_PDD_SIGNATURE)
1249		goto hdrerror;
1250	i = ddf_meta_find_pd(meta, NULL, GET32(meta, pdd->PD_Reference));
1251	if (i < 0)
1252		goto hdrerror;
1253
1254	/* Read BBM Log. */
1255	if (GET32(meta, hdr->bbmlog_section) != 0xffffffff &&
1256	    GET32(meta, hdr->bbmlog_length) != 0) {
1257		buf = g_read_data(cp, (lba + GET32(meta, hdr->bbmlog_section)) * ss,
1258		    GET32(meta, hdr->bbmlog_length) * ss, &error);
1259		if (buf == NULL)
1260			goto readerror;
1261		meta->bbm = malloc(GET32(meta, hdr->bbmlog_length) * ss, M_MD_DDF, M_WAITOK);
1262		memcpy(meta->bbm, buf, GET32(meta, hdr->bbmlog_length) * ss);
1263		g_free(buf);
1264		if (GET32(meta, bbm->Signature) != DDF_BBML_SIGNATURE)
1265			goto hdrerror;
1266	}
1267
1268done:
1269	g_free(abuf);
1270	if (error != 0)
1271		ddf_meta_free(meta);
1272	return (error);
1273}
1274
1275static int
1276ddf_meta_write(struct g_consumer *cp, struct ddf_meta *meta)
1277{
1278	struct g_provider *pp;
1279	struct ddf_vdc_record *vdc;
1280	off_t alba, plba, slba, lba;
1281	u_int ss, size;
1282	int error, i, num;
1283
1284	pp = cp->provider;
1285	ss = pp->sectorsize;
1286	lba = alba = pp->mediasize / ss - 1;
1287	plba = GET64(meta, hdr->Primary_Header_LBA);
1288	slba = GET64(meta, hdr->Secondary_Header_LBA);
1289
1290next:
1291	SET8(meta, hdr->Header_Type, (lba == alba) ? DDF_HEADER_ANCHOR :
1292	    (lba == plba) ? DDF_HEADER_PRIMARY : DDF_HEADER_SECONDARY);
1293	SET32(meta, hdr->CRC, 0xffffffff);
1294	SET32(meta, hdr->CRC, crc32(meta->hdr, ss));
1295	error = g_write_data(cp, lba * ss, meta->hdr, ss);
1296	if (error != 0) {
1297err:
1298		G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
1299		    pp->name, error);
1300		if (lba != alba)
1301			goto done;
1302	}
1303	if (lba == alba) {
1304		lba = plba;
1305		goto next;
1306	}
1307
1308	size = GET32(meta, hdr->cd_length) * ss;
1309	SET32(meta, cdr->CRC, 0xffffffff);
1310	SET32(meta, cdr->CRC, crc32(meta->cdr, size));
1311	error = g_write_data(cp, (lba + GET32(meta, hdr->cd_section)) * ss,
1312	    meta->cdr, size);
1313	if (error != 0)
1314		goto err;
1315
1316	size = GET32(meta, hdr->pdr_length) * ss;
1317	SET32(meta, pdr->CRC, 0xffffffff);
1318	SET32(meta, pdr->CRC, crc32(meta->pdr, size));
1319	error = g_write_data(cp, (lba + GET32(meta, hdr->pdr_section)) * ss,
1320	    meta->pdr, size);
1321	if (error != 0)
1322		goto err;
1323
1324	size = GET32(meta, hdr->vdr_length) * ss;
1325	SET32(meta, vdr->CRC, 0xffffffff);
1326	SET32(meta, vdr->CRC, crc32(meta->vdr, size));
1327	error = g_write_data(cp, (lba + GET32(meta, hdr->vdr_section)) * ss,
1328	    meta->vdr, size);
1329	if (error != 0)
1330		goto err;
1331
1332	size = GET16(meta, hdr->Configuration_Record_Length) * ss;
1333	num = GETCRNUM(meta);
1334	for (i = 0; i < num; i++) {
1335		vdc = GETVDCPTR(meta, i);
1336		SET32D(meta, vdc->CRC, 0xffffffff);
1337		SET32D(meta, vdc->CRC, crc32(vdc, size));
1338	}
1339	error = g_write_data(cp, (lba + GET32(meta, hdr->cr_section)) * ss,
1340	    meta->cr, size * num);
1341	if (error != 0)
1342		goto err;
1343
1344	size = GET32(meta, hdr->pdd_length) * ss;
1345	SET32(meta, pdd->CRC, 0xffffffff);
1346	SET32(meta, pdd->CRC, crc32(meta->pdd, size));
1347	error = g_write_data(cp, (lba + GET32(meta, hdr->pdd_section)) * ss,
1348	    meta->pdd, size);
1349	if (error != 0)
1350		goto err;
1351
1352	if (GET32(meta, hdr->bbmlog_length) != 0) {
1353		size = GET32(meta, hdr->bbmlog_length) * ss;
1354		SET32(meta, bbm->CRC, 0xffffffff);
1355		SET32(meta, bbm->CRC, crc32(meta->bbm, size));
1356		error = g_write_data(cp,
1357		    (lba + GET32(meta, hdr->bbmlog_section)) * ss,
1358		    meta->bbm, size);
1359		if (error != 0)
1360			goto err;
1361	}
1362
1363done:
1364	if (lba == plba && slba != -1) {
1365		lba = slba;
1366		goto next;
1367	}
1368
1369	return (error);
1370}
1371
1372static int
1373ddf_meta_erase(struct g_consumer *cp)
1374{
1375	struct g_provider *pp;
1376	char *buf;
1377	int error;
1378
1379	pp = cp->provider;
1380	buf = malloc(pp->sectorsize, M_MD_DDF, M_WAITOK | M_ZERO);
1381	error = g_write_data(cp, pp->mediasize - pp->sectorsize,
1382	    buf, pp->sectorsize);
1383	if (error != 0) {
1384		G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
1385		    pp->name, error);
1386	}
1387	free(buf, M_MD_DDF);
1388	return (error);
1389}
1390
1391static struct g_raid_volume *
1392g_raid_md_ddf_get_volume(struct g_raid_softc *sc, uint8_t *GUID)
1393{
1394	struct g_raid_volume	*vol;
1395	struct g_raid_md_ddf_pervolume *pv;
1396
1397	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1398		pv = vol->v_md_data;
1399		if (memcmp(pv->pv_meta.vde->VD_GUID, GUID, 24) == 0)
1400			break;
1401	}
1402	return (vol);
1403}
1404
1405static struct g_raid_disk *
1406g_raid_md_ddf_get_disk(struct g_raid_softc *sc, uint8_t *GUID, uint32_t id)
1407{
1408	struct g_raid_disk	*disk;
1409	struct g_raid_md_ddf_perdisk *pd;
1410	struct ddf_meta *meta;
1411
1412	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1413		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
1414		meta = &pd->pd_meta;
1415		if (GUID != NULL) {
1416			if (memcmp(meta->pdd->PD_GUID, GUID, 24) == 0)
1417				break;
1418		} else {
1419			if (GET32(meta, pdd->PD_Reference) == id)
1420				break;
1421		}
1422	}
1423	return (disk);
1424}
1425
1426static int
1427g_raid_md_ddf_purge_volumes(struct g_raid_softc *sc)
1428{
1429	struct g_raid_volume	*vol, *tvol;
1430	struct g_raid_md_ddf_pervolume *pv;
1431	int i, res;
1432
1433	res = 0;
1434	TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tvol) {
1435		pv = vol->v_md_data;
1436		if (vol->v_stopping)
1437			continue;
1438		for (i = 0; i < vol->v_disks_count; i++) {
1439			if (vol->v_subdisks[i].sd_state != G_RAID_SUBDISK_S_NONE)
1440				break;
1441		}
1442		if (i >= vol->v_disks_count) {
1443			g_raid_destroy_volume(vol);
1444			res = 1;
1445		}
1446	}
1447	return (res);
1448}
1449
1450static int
1451g_raid_md_ddf_purge_disks(struct g_raid_softc *sc)
1452{
1453#if 0
1454	struct g_raid_disk	*disk, *tdisk;
1455	struct g_raid_volume	*vol;
1456	struct g_raid_md_ddf_perdisk *pd;
1457	int i, j, res;
1458
1459	res = 0;
1460	TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
1461		if (disk->d_state == G_RAID_DISK_S_SPARE)
1462			continue;
1463		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
1464
1465		/* Scan for deleted volumes. */
1466		for (i = 0; i < pd->pd_subdisks; ) {
1467			vol = g_raid_md_ddf_get_volume(sc,
1468			    pd->pd_meta[i]->volume_id);
1469			if (vol != NULL && !vol->v_stopping) {
1470				i++;
1471				continue;
1472			}
1473			free(pd->pd_meta[i], M_MD_DDF);
1474			for (j = i; j < pd->pd_subdisks - 1; j++)
1475				pd->pd_meta[j] = pd->pd_meta[j + 1];
1476			pd->pd_meta[DDF_MAX_SUBDISKS - 1] = NULL;
1477			pd->pd_subdisks--;
1478			pd->pd_updated = 1;
1479		}
1480
1481		/* If there is no metadata left - erase and delete disk. */
1482		if (pd->pd_subdisks == 0) {
1483			ddf_meta_erase(disk->d_consumer);
1484			g_raid_destroy_disk(disk);
1485			res = 1;
1486		}
1487	}
1488	return (res);
1489#endif
1490	return (0);
1491}
1492
1493static int
1494g_raid_md_ddf_supported(int level, int qual, int disks, int force)
1495{
1496
1497	if (disks > DDF_MAX_DISKS_HARD)
1498		return (0);
1499	switch (level) {
1500	case G_RAID_VOLUME_RL_RAID0:
1501		if (qual != G_RAID_VOLUME_RLQ_NONE)
1502			return (0);
1503		if (disks < 1)
1504			return (0);
1505		if (!force && disks < 2)
1506			return (0);
1507		break;
1508	case G_RAID_VOLUME_RL_RAID1:
1509		if (disks < 1)
1510			return (0);
1511		if (qual == G_RAID_VOLUME_RLQ_R1SM) {
1512			if (!force && disks != 2)
1513				return (0);
1514		} else if (qual == G_RAID_VOLUME_RLQ_R1MM) {
1515			if (!force && disks != 3)
1516				return (0);
1517		} else
1518			return (0);
1519		break;
1520	case G_RAID_VOLUME_RL_RAID3:
1521		if (qual != G_RAID_VOLUME_RLQ_R3P0 &&
1522		    qual != G_RAID_VOLUME_RLQ_R3PN)
1523			return (0);
1524		if (disks < 3)
1525			return (0);
1526		break;
1527	case G_RAID_VOLUME_RL_RAID4:
1528		if (qual != G_RAID_VOLUME_RLQ_R4P0 &&
1529		    qual != G_RAID_VOLUME_RLQ_R4PN)
1530			return (0);
1531		if (disks < 3)
1532			return (0);
1533		break;
1534	case G_RAID_VOLUME_RL_RAID5:
1535		if (qual != G_RAID_VOLUME_RLQ_R5RA &&
1536		    qual != G_RAID_VOLUME_RLQ_R5RS &&
1537		    qual != G_RAID_VOLUME_RLQ_R5LA &&
1538		    qual != G_RAID_VOLUME_RLQ_R5LS)
1539			return (0);
1540		if (disks < 3)
1541			return (0);
1542		break;
1543	case G_RAID_VOLUME_RL_RAID6:
1544		if (qual != G_RAID_VOLUME_RLQ_R6RA &&
1545		    qual != G_RAID_VOLUME_RLQ_R6RS &&
1546		    qual != G_RAID_VOLUME_RLQ_R6LA &&
1547		    qual != G_RAID_VOLUME_RLQ_R6LS)
1548			return (0);
1549		if (disks < 4)
1550			return (0);
1551		break;
1552	case G_RAID_VOLUME_RL_RAIDMDF:
1553		if (qual != G_RAID_VOLUME_RLQ_RMDFRA &&
1554		    qual != G_RAID_VOLUME_RLQ_RMDFRS &&
1555		    qual != G_RAID_VOLUME_RLQ_RMDFLA &&
1556		    qual != G_RAID_VOLUME_RLQ_RMDFLS)
1557			return (0);
1558		if (disks < 4)
1559			return (0);
1560		break;
1561	case G_RAID_VOLUME_RL_RAID1E:
1562		if (qual != G_RAID_VOLUME_RLQ_R1EA &&
1563		    qual != G_RAID_VOLUME_RLQ_R1EO)
1564			return (0);
1565		if (disks < 3)
1566			return (0);
1567		break;
1568	case G_RAID_VOLUME_RL_SINGLE:
1569		if (qual != G_RAID_VOLUME_RLQ_NONE)
1570			return (0);
1571		if (disks != 1)
1572			return (0);
1573		break;
1574	case G_RAID_VOLUME_RL_CONCAT:
1575		if (qual != G_RAID_VOLUME_RLQ_NONE)
1576			return (0);
1577		if (disks < 2)
1578			return (0);
1579		break;
1580	case G_RAID_VOLUME_RL_RAID5E:
1581		if (qual != G_RAID_VOLUME_RLQ_R5ERA &&
1582		    qual != G_RAID_VOLUME_RLQ_R5ERS &&
1583		    qual != G_RAID_VOLUME_RLQ_R5ELA &&
1584		    qual != G_RAID_VOLUME_RLQ_R5ELS)
1585			return (0);
1586		if (disks < 4)
1587			return (0);
1588		break;
1589	case G_RAID_VOLUME_RL_RAID5EE:
1590		if (qual != G_RAID_VOLUME_RLQ_R5EERA &&
1591		    qual != G_RAID_VOLUME_RLQ_R5EERS &&
1592		    qual != G_RAID_VOLUME_RLQ_R5EELA &&
1593		    qual != G_RAID_VOLUME_RLQ_R5EELS)
1594			return (0);
1595		if (disks < 4)
1596			return (0);
1597		break;
1598	case G_RAID_VOLUME_RL_RAID5R:
1599		if (qual != G_RAID_VOLUME_RLQ_R5RRA &&
1600		    qual != G_RAID_VOLUME_RLQ_R5RRS &&
1601		    qual != G_RAID_VOLUME_RLQ_R5RLA &&
1602		    qual != G_RAID_VOLUME_RLQ_R5RLS)
1603			return (0);
1604		if (disks < 3)
1605			return (0);
1606		break;
1607	default:
1608		return (0);
1609	}
1610	return (1);
1611}
1612
1613static int
1614g_raid_md_ddf_start_disk(struct g_raid_disk *disk, struct g_raid_volume *vol)
1615{
1616	struct g_raid_softc *sc;
1617	struct g_raid_subdisk *sd;
1618	struct g_raid_md_ddf_perdisk *pd;
1619	struct g_raid_md_ddf_pervolume *pv;
1620	struct g_raid_md_ddf_object *mdi;
1621	struct ddf_vol_meta *vmeta;
1622	struct ddf_meta *pdmeta, *gmeta;
1623	struct ddf_vdc_record *vdc1;
1624	struct ddf_sa_record *sa;
1625	off_t size, eoff = 0, esize = 0;
1626	uint64_t *val2;
1627	int disk_pos, md_disk_bvd = -1, md_disk_pos = -1, md_pde_pos;
1628	int i, resurrection = 0;
1629	uint32_t reference;
1630
1631	sc = disk->d_softc;
1632	mdi = (struct g_raid_md_ddf_object *)sc->sc_md;
1633	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
1634	pdmeta = &pd->pd_meta;
1635	reference = GET32(&pd->pd_meta, pdd->PD_Reference);
1636
1637	pv = vol->v_md_data;
1638	vmeta = &pv->pv_meta;
1639	gmeta = &mdi->mdio_meta;
1640
1641	/* Find disk position in metadata by its reference. */
1642	disk_pos = ddf_meta_find_disk(vmeta, reference,
1643	    &md_disk_bvd, &md_disk_pos);
1644	md_pde_pos = ddf_meta_find_pd(gmeta, NULL, reference);
1645
1646	if (disk_pos < 0) {
1647		G_RAID_DEBUG1(1, sc,
1648		    "Disk %s is not a present part of the volume %s",
1649		    g_raid_get_diskname(disk), vol->v_name);
1650
1651		/* Failed stale disk is useless for us. */
1652		if ((GET16(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA) != 0) {
1653			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
1654			return (0);
1655		}
1656
1657		/* If disk has some metadata for this volume - erase. */
1658		if ((vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL)
1659			SET32D(pdmeta, vdc1->Signature, 0xffffffff);
1660
1661		/* If we are in the start process, that's all for now. */
1662		if (!pv->pv_started)
1663			goto nofit;
1664		/*
1665		 * If we have already started - try to get use of the disk.
1666		 * Try to replace OFFLINE disks first, then FAILED.
1667		 */
1668		if (ddf_meta_count_vdc(&pd->pd_meta, NULL) >=
1669			GET16(&pd->pd_meta, hdr->Max_Partitions)) {
1670			G_RAID_DEBUG1(1, sc, "No free partitions on disk %s",
1671			    g_raid_get_diskname(disk));
1672			goto nofit;
1673		}
1674		ddf_meta_unused_range(&pd->pd_meta, &eoff, &esize);
1675		if (esize == 0) {
1676			G_RAID_DEBUG1(1, sc, "No free space on disk %s",
1677			    g_raid_get_diskname(disk));
1678			goto nofit;
1679		}
1680		eoff *= pd->pd_meta.sectorsize;
1681		esize *= pd->pd_meta.sectorsize;
1682		size = INT64_MAX;
1683		for (i = 0; i < vol->v_disks_count; i++) {
1684			sd = &vol->v_subdisks[i];
1685			if (sd->sd_state != G_RAID_SUBDISK_S_NONE)
1686				size = sd->sd_size;
1687			if (sd->sd_state <= G_RAID_SUBDISK_S_FAILED &&
1688			    (disk_pos < 0 ||
1689			     vol->v_subdisks[i].sd_state < sd->sd_state))
1690				disk_pos = i;
1691		}
1692		if (disk_pos >= 0 &&
1693		    vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT &&
1694		    esize < size) {
1695			G_RAID_DEBUG1(1, sc, "Disk %s free space "
1696			    "is too small (%ju < %ju)",
1697			    g_raid_get_diskname(disk), esize, size);
1698			disk_pos = -1;
1699		}
1700		if (disk_pos >= 0) {
1701			if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT)
1702				esize = size;
1703			md_disk_bvd = disk_pos / GET16(vmeta, vdc->Primary_Element_Count); // XXX
1704			md_disk_pos = disk_pos % GET16(vmeta, vdc->Primary_Element_Count); // XXX
1705		} else {
1706nofit:
1707			if (disk->d_state == G_RAID_DISK_S_NONE)
1708				g_raid_change_disk_state(disk,
1709				    G_RAID_DISK_S_STALE);
1710			return (0);
1711		}
1712
1713		/*
1714		 * If spare is committable, delete spare record.
1715		 * Othersize, mark it active and leave there.
1716		 */
1717		sa = ddf_meta_find_sa(&pd->pd_meta, 0);
1718		if (sa != NULL) {
1719			if ((GET8D(&pd->pd_meta, sa->Spare_Type) &
1720			    DDF_SAR_TYPE_REVERTIBLE) == 0) {
1721				SET32D(&pd->pd_meta, sa->Signature, 0xffffffff);
1722			} else {
1723				SET8D(&pd->pd_meta, sa->Spare_Type,
1724				    GET8D(&pd->pd_meta, sa->Spare_Type) |
1725				    DDF_SAR_TYPE_ACTIVE);
1726			}
1727		}
1728
1729		G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s",
1730		    g_raid_get_diskname(disk), disk_pos, vol->v_name);
1731		resurrection = 1;
1732	}
1733
1734	sd = &vol->v_subdisks[disk_pos];
1735
1736	if (resurrection && sd->sd_disk != NULL) {
1737		g_raid_change_disk_state(sd->sd_disk,
1738		    G_RAID_DISK_S_STALE_FAILED);
1739		TAILQ_REMOVE(&sd->sd_disk->d_subdisks,
1740		    sd, sd_next);
1741	}
1742	vol->v_subdisks[disk_pos].sd_disk = disk;
1743	TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1744
1745	/* Welcome the new disk. */
1746	if (resurrection)
1747		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
1748	else if (GET16(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA)
1749		g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
1750	else
1751		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
1752
1753	if (resurrection) {
1754		sd->sd_offset = eoff;
1755		sd->sd_size = esize;
1756	} else if (pdmeta->cr != NULL &&
1757	    (vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL) {
1758		val2 = (uint64_t *)&(vdc1->Physical_Disk_Sequence[GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
1759		sd->sd_offset = (off_t)GET64P(pdmeta, val2 + md_disk_pos) * 512;
1760		sd->sd_size = (off_t)GET64D(pdmeta, vdc1->Block_Count) * 512;
1761	}
1762
1763	if (resurrection) {
1764		/* Stale disk, almost same as new. */
1765		g_raid_change_subdisk_state(sd,
1766		    G_RAID_SUBDISK_S_NEW);
1767	} else if (GET16(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA) {
1768		/* Failed disk. */
1769		g_raid_change_subdisk_state(sd,
1770		    G_RAID_SUBDISK_S_FAILED);
1771	} else if ((GET16(gmeta, pdr->entry[md_pde_pos].PD_State) &
1772	     (DDF_PDE_FAILED | DDF_PDE_REBUILD)) != 0) {
1773		/* Rebuilding disk. */
1774		g_raid_change_subdisk_state(sd,
1775		    G_RAID_SUBDISK_S_REBUILD);
1776		sd->sd_rebuild_pos = 0;
1777	} else if ((GET8(vmeta, vde->VD_State) & DDF_VDE_DIRTY) != 0 ||
1778	    (GET8(vmeta, vde->Init_State) & DDF_VDE_INIT_MASK) !=
1779	     DDF_VDE_INIT_FULL) {
1780		/* Stale disk or dirty volume (unclean shutdown). */
1781		g_raid_change_subdisk_state(sd,
1782		    G_RAID_SUBDISK_S_STALE);
1783	} else {
1784		/* Up to date disk. */
1785		g_raid_change_subdisk_state(sd,
1786		    G_RAID_SUBDISK_S_ACTIVE);
1787	}
1788	g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1789	    G_RAID_EVENT_SUBDISK);
1790
1791	return (resurrection);
1792}
1793
1794static void
1795g_raid_md_ddf_refill(struct g_raid_softc *sc)
1796{
1797	struct g_raid_volume *vol;
1798	struct g_raid_subdisk *sd;
1799	struct g_raid_disk *disk;
1800	struct g_raid_md_object *md;
1801	struct g_raid_md_ddf_perdisk *pd;
1802	struct g_raid_md_ddf_pervolume *pv;
1803	int update, updated, i, bad;
1804
1805	md = sc->sc_md;
1806restart:
1807	updated = 0;
1808	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1809		pv = vol->v_md_data;
1810		if (!pv->pv_started || vol->v_stopping)
1811			continue;
1812
1813		/* Search for subdisk that needs replacement. */
1814		bad = 0;
1815		for (i = 0; i < vol->v_disks_count; i++) {
1816			sd = &vol->v_subdisks[i];
1817			if (sd->sd_state == G_RAID_SUBDISK_S_NONE ||
1818			    sd->sd_state == G_RAID_SUBDISK_S_FAILED)
1819			        bad = 1;
1820		}
1821		if (!bad)
1822			continue;
1823
1824		G_RAID_DEBUG1(1, sc, "Volume %s is not complete, "
1825		    "trying to refill.", vol->v_name);
1826
1827		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1828			/* Skip failed. */
1829			if (disk->d_state < G_RAID_DISK_S_SPARE)
1830				continue;
1831			/* Skip already used by this volume. */
1832			for (i = 0; i < vol->v_disks_count; i++) {
1833				sd = &vol->v_subdisks[i];
1834				if (sd->sd_disk == disk)
1835					break;
1836			}
1837			if (i < vol->v_disks_count)
1838				continue;
1839
1840			/* Try to use disk if it has empty extents. */
1841			pd = disk->d_md_data;
1842			if (ddf_meta_count_vdc(&pd->pd_meta, NULL) <
1843			    GET16(&pd->pd_meta, hdr->Max_Partitions)) {
1844				update = g_raid_md_ddf_start_disk(disk, vol);
1845			} else
1846				update = 0;
1847			if (update) {
1848				updated = 1;
1849				g_raid_md_write_ddf(md, vol, NULL, disk);
1850				break;
1851			}
1852		}
1853	}
1854	if (updated)
1855		goto restart;
1856}
1857
1858static void
1859g_raid_md_ddf_start(struct g_raid_volume *vol)
1860{
1861	struct g_raid_softc *sc;
1862	struct g_raid_subdisk *sd;
1863	struct g_raid_disk *disk;
1864	struct g_raid_md_object *md;
1865	struct g_raid_md_ddf_perdisk *pd;
1866	struct g_raid_md_ddf_pervolume *pv;
1867	struct g_raid_md_ddf_object *mdi;
1868	struct ddf_vol_meta *vmeta;
1869	struct ddf_vdc_record *vdc;
1870	uint64_t *val2;
1871	int i, j, bvd;
1872
1873	sc = vol->v_softc;
1874	md = sc->sc_md;
1875	mdi = (struct g_raid_md_ddf_object *)md;
1876	pv = vol->v_md_data;
1877	vmeta = &pv->pv_meta;
1878	vdc = vmeta->vdc;
1879
1880	vol->v_raid_level = GET8(vmeta, vdc->Primary_RAID_Level);
1881	vol->v_raid_level_qualifier = GET8(vmeta, vdc->RLQ);
1882	if (GET8(vmeta, vdc->Secondary_Element_Count) > 1 &&
1883	    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 &&
1884	    GET8(vmeta, vdc->Secondary_RAID_Level) == 0)
1885		vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
1886	vol->v_sectorsize = GET16(vmeta, vdc->Block_Size);
1887	if (vol->v_sectorsize == 0xffff)
1888		vol->v_sectorsize = vmeta->sectorsize;
1889	vol->v_strip_size = vol->v_sectorsize << GET8(vmeta, vdc->Stripe_Size);
1890	vol->v_disks_count = GET16(vmeta, vdc->Primary_Element_Count) *
1891	    GET8(vmeta, vdc->Secondary_Element_Count);
1892	vol->v_mdf_pdisks = GET8(vmeta, vdc->MDF_Parity_Disks);
1893	vol->v_mdf_polynomial = GET16(vmeta, vdc->MDF_Parity_Generator_Polynomial);
1894	vol->v_mdf_method = GET8(vmeta, vdc->MDF_Constant_Generation_Method);
1895	if (GET8(vmeta, vdc->Rotate_Parity_count) > 31)
1896		vol->v_rotate_parity = 1;
1897	else
1898		vol->v_rotate_parity = 1 << GET8(vmeta, vdc->Rotate_Parity_count);
1899	vol->v_mediasize = GET64(vmeta, vdc->VD_Size) * vol->v_sectorsize;
1900	for (i = 0, j = 0, bvd = 0; i < vol->v_disks_count; i++, j++) {
1901		if (j == GET16(vmeta, vdc->Primary_Element_Count)) {
1902			j = 0;
1903			bvd++;
1904		}
1905		sd = &vol->v_subdisks[i];
1906		if (vmeta->bvdc[bvd] == NULL) {
1907			sd->sd_offset = 0;
1908			sd->sd_size = GET64(vmeta, vdc->Block_Count) *
1909			    vol->v_sectorsize;
1910			continue;
1911		}
1912		val2 = (uint64_t *)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[
1913		    GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
1914		sd->sd_offset = GET64P(vmeta, val2 + j) * vol->v_sectorsize;
1915		sd->sd_size = GET64(vmeta, bvdc[bvd]->Block_Count) *
1916		    vol->v_sectorsize;
1917	}
1918	g_raid_start_volume(vol);
1919
1920	/* Make all disks found till the moment take their places. */
1921	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1922		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
1923		if (ddf_meta_find_vdc(&pd->pd_meta, vmeta->vdc->VD_GUID) != NULL)
1924			g_raid_md_ddf_start_disk(disk, vol);
1925	}
1926
1927	pv->pv_started = 1;
1928	mdi->mdio_starting--;
1929	callout_stop(&pv->pv_start_co);
1930	G_RAID_DEBUG1(0, sc, "Volume started.");
1931	g_raid_md_write_ddf(md, vol, NULL, NULL);
1932
1933	/* Pickup any STALE/SPARE disks to refill array if needed. */
1934	g_raid_md_ddf_refill(sc);
1935
1936	g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME);
1937}
1938
1939static void
1940g_raid_ddf_go(void *arg)
1941{
1942	struct g_raid_volume *vol;
1943	struct g_raid_softc *sc;
1944	struct g_raid_md_ddf_pervolume *pv;
1945
1946	vol = arg;
1947	pv = vol->v_md_data;
1948	sc = vol->v_softc;
1949	if (!pv->pv_started) {
1950		G_RAID_DEBUG1(0, sc, "Force volume start due to timeout.");
1951		g_raid_event_send(vol, G_RAID_VOLUME_E_STARTMD,
1952		    G_RAID_EVENT_VOLUME);
1953	}
1954}
1955
1956static void
1957g_raid_md_ddf_new_disk(struct g_raid_disk *disk)
1958{
1959	struct g_raid_softc *sc;
1960	struct g_raid_md_object *md;
1961	struct g_raid_md_ddf_perdisk *pd;
1962	struct g_raid_md_ddf_pervolume *pv;
1963	struct g_raid_md_ddf_object *mdi;
1964	struct g_raid_volume *vol;
1965	struct ddf_meta *pdmeta;
1966	struct ddf_vol_meta *vmeta;
1967	struct ddf_vdc_record *vdc;
1968	struct ddf_vd_entry *vde;
1969	int i, j, k, num, have, need, cnt, spare;
1970	uint32_t val;
1971	char buf[17];
1972
1973	sc = disk->d_softc;
1974	md = sc->sc_md;
1975	mdi = (struct g_raid_md_ddf_object *)md;
1976	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
1977	pdmeta = &pd->pd_meta;
1978	spare = -1;
1979
1980	if (mdi->mdio_meta.hdr == NULL)
1981		ddf_meta_copy(&mdi->mdio_meta, pdmeta);
1982	else
1983		ddf_meta_update(&mdi->mdio_meta, pdmeta);
1984
1985	num = GETCRNUM(pdmeta);
1986	for (j = 0; j < num; j++) {
1987		vdc = GETVDCPTR(pdmeta, j);
1988		val = GET32D(pdmeta, vdc->Signature);
1989
1990		if (val == DDF_SA_SIGNATURE && spare == -1)
1991			spare = 1;
1992
1993		if (val != DDF_VDCR_SIGNATURE)
1994			continue;
1995		spare = 0;
1996		k = ddf_meta_find_vd(pdmeta, vdc->VD_GUID);
1997		if (k < 0)
1998			continue;
1999		vde = &pdmeta->vdr->entry[k];
2000
2001		/* Look for volume with matching ID. */
2002		vol = g_raid_md_ddf_get_volume(sc, vdc->VD_GUID);
2003		if (vol == NULL) {
2004			ddf_meta_get_name(pdmeta, k, buf);
2005			vol = g_raid_create_volume(sc, buf,
2006			    GET16D(pdmeta, vde->VD_Number));
2007			pv = malloc(sizeof(*pv), M_MD_DDF, M_WAITOK | M_ZERO);
2008			vol->v_md_data = pv;
2009			callout_init(&pv->pv_start_co, 1);
2010			callout_reset(&pv->pv_start_co,
2011			    g_raid_start_timeout * hz,
2012			    g_raid_ddf_go, vol);
2013			mdi->mdio_starting++;
2014		} else
2015			pv = vol->v_md_data;
2016
2017		/* If we haven't started yet - check metadata freshness. */
2018		vmeta = &pv->pv_meta;
2019		ddf_vol_meta_update(vmeta, pdmeta, vdc->VD_GUID, pv->pv_started);
2020	}
2021
2022	if (spare == 1) {
2023		g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
2024		g_raid_md_ddf_refill(sc);
2025	}
2026
2027	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2028		pv = vol->v_md_data;
2029		vmeta = &pv->pv_meta;
2030
2031		if (ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID) == NULL)
2032			continue;
2033
2034		if (pv->pv_started) {
2035			if (g_raid_md_ddf_start_disk(disk, vol))
2036				g_raid_md_write_ddf(md, vol, NULL, NULL);
2037			continue;
2038		}
2039
2040		/* If we collected all needed disks - start array. */
2041		need = 0;
2042		have = 0;
2043		for (k = 0; k < GET8(vmeta, vdc->Secondary_Element_Count); k++) {
2044			if (vmeta->bvdc[k] == NULL) {
2045				need += GET16(vmeta, vdc->Primary_Element_Count);
2046				continue;
2047			}
2048			cnt = GET16(vmeta, bvdc[k]->Primary_Element_Count);
2049			need += cnt;
2050			for (i = 0; i < cnt; i++) {
2051				val = GET32(vmeta, bvdc[k]->Physical_Disk_Sequence[i]);
2052				if (g_raid_md_ddf_get_disk(sc, NULL, val) != NULL)
2053					have++;
2054			}
2055		}
2056		G_RAID_DEBUG1(1, sc, "Volume %s now has %d of %d disks",
2057		    vol->v_name, have, need);
2058		if (have == need)
2059			g_raid_md_ddf_start(vol);
2060	}
2061}
2062
2063static int
2064g_raid_md_create_req_ddf(struct g_raid_md_object *md, struct g_class *mp,
2065    struct gctl_req *req, struct g_geom **gp)
2066{
2067	struct g_geom *geom;
2068	struct g_raid_softc *sc;
2069	struct g_raid_md_ddf_object *mdi, *mdi1;
2070	char name[16];
2071	const char *fmtopt;
2072	int be = 1;
2073
2074	mdi = (struct g_raid_md_ddf_object *)md;
2075	fmtopt = gctl_get_asciiparam(req, "fmtopt");
2076	if (fmtopt == NULL || strcasecmp(fmtopt, "BE") == 0)
2077		be = 1;
2078	else if (strcasecmp(fmtopt, "LE") == 0)
2079		be = 0;
2080	else {
2081		gctl_error(req, "Incorrect fmtopt argument.");
2082		return (G_RAID_MD_TASTE_FAIL);
2083	}
2084
2085	/* Search for existing node. */
2086	LIST_FOREACH(geom, &mp->geom, geom) {
2087		sc = geom->softc;
2088		if (sc == NULL)
2089			continue;
2090		if (sc->sc_stopping != 0)
2091			continue;
2092		if (sc->sc_md->mdo_class != md->mdo_class)
2093			continue;
2094		mdi1 = (struct g_raid_md_ddf_object *)sc->sc_md;
2095		if (mdi1->mdio_bigendian != be)
2096			continue;
2097		break;
2098	}
2099	if (geom != NULL) {
2100		*gp = geom;
2101		return (G_RAID_MD_TASTE_EXISTING);
2102	}
2103
2104	/* Create new one if not found. */
2105	mdi->mdio_bigendian = be;
2106	snprintf(name, sizeof(name), "DDF%s", be ? "" : "-LE");
2107	sc = g_raid_create_node(mp, name, md);
2108	if (sc == NULL)
2109		return (G_RAID_MD_TASTE_FAIL);
2110	md->mdo_softc = sc;
2111	*gp = sc->sc_geom;
2112	return (G_RAID_MD_TASTE_NEW);
2113}
2114
2115static int
2116g_raid_md_taste_ddf(struct g_raid_md_object *md, struct g_class *mp,
2117                              struct g_consumer *cp, struct g_geom **gp)
2118{
2119	struct g_consumer *rcp;
2120	struct g_provider *pp;
2121	struct g_raid_softc *sc;
2122	struct g_raid_disk *disk;
2123	struct ddf_meta meta;
2124	struct g_raid_md_ddf_perdisk *pd;
2125	struct g_raid_md_ddf_object *mdi;
2126	struct g_geom *geom;
2127	int error, result, be;
2128	char name[16];
2129
2130	G_RAID_DEBUG(1, "Tasting DDF on %s", cp->provider->name);
2131	mdi = (struct g_raid_md_ddf_object *)md;
2132	pp = cp->provider;
2133
2134	/* Read metadata from device. */
2135	g_topology_unlock();
2136	bzero(&meta, sizeof(meta));
2137	error = ddf_meta_read(cp, &meta);
2138	g_topology_lock();
2139	if (error != 0)
2140		return (G_RAID_MD_TASTE_FAIL);
2141	be = meta.bigendian;
2142
2143	/* Metadata valid. Print it. */
2144	g_raid_md_ddf_print(&meta);
2145
2146	/* Search for matching node. */
2147	sc = NULL;
2148	LIST_FOREACH(geom, &mp->geom, geom) {
2149		sc = geom->softc;
2150		if (sc == NULL)
2151			continue;
2152		if (sc->sc_stopping != 0)
2153			continue;
2154		if (sc->sc_md->mdo_class != md->mdo_class)
2155			continue;
2156		mdi = (struct g_raid_md_ddf_object *)sc->sc_md;
2157		if (mdi->mdio_bigendian != be)
2158			continue;
2159		break;
2160	}
2161
2162	/* Found matching node. */
2163	if (geom != NULL) {
2164		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
2165		result = G_RAID_MD_TASTE_EXISTING;
2166
2167	} else { /* Not found matching node -- create one. */
2168		result = G_RAID_MD_TASTE_NEW;
2169		mdi->mdio_bigendian = be;
2170		snprintf(name, sizeof(name), "DDF%s", be ? "" : "-LE");
2171		sc = g_raid_create_node(mp, name, md);
2172		md->mdo_softc = sc;
2173		geom = sc->sc_geom;
2174	}
2175
2176	/* There is no return after this point, so we close passed consumer. */
2177	g_access(cp, -1, 0, 0);
2178
2179	rcp = g_new_consumer(geom);
2180	rcp->flags |= G_CF_DIRECT_RECEIVE;
2181	g_attach(rcp, pp);
2182	if (g_access(rcp, 1, 1, 1) != 0)
2183		; //goto fail1;
2184
2185	g_topology_unlock();
2186	sx_xlock(&sc->sc_lock);
2187
2188	pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO);
2189	pd->pd_meta = meta;
2190	disk = g_raid_create_disk(sc);
2191	disk->d_md_data = (void *)pd;
2192	disk->d_consumer = rcp;
2193	rcp->private = disk;
2194
2195	g_raid_get_disk_info(disk);
2196
2197	g_raid_md_ddf_new_disk(disk);
2198
2199	sx_xunlock(&sc->sc_lock);
2200	g_topology_lock();
2201	*gp = geom;
2202	return (result);
2203}
2204
2205static int
2206g_raid_md_event_ddf(struct g_raid_md_object *md,
2207    struct g_raid_disk *disk, u_int event)
2208{
2209	struct g_raid_softc *sc;
2210
2211	sc = md->mdo_softc;
2212	if (disk == NULL)
2213		return (-1);
2214	switch (event) {
2215	case G_RAID_DISK_E_DISCONNECTED:
2216		/* Delete disk. */
2217		g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
2218		g_raid_destroy_disk(disk);
2219		g_raid_md_ddf_purge_volumes(sc);
2220
2221		/* Write updated metadata to all disks. */
2222		g_raid_md_write_ddf(md, NULL, NULL, NULL);
2223
2224		/* Check if anything left. */
2225		if (g_raid_ndisks(sc, -1) == 0)
2226			g_raid_destroy_node(sc, 0);
2227		else
2228			g_raid_md_ddf_refill(sc);
2229		return (0);
2230	}
2231	return (-2);
2232}
2233
2234static int
2235g_raid_md_volume_event_ddf(struct g_raid_md_object *md,
2236    struct g_raid_volume *vol, u_int event)
2237{
2238	struct g_raid_md_ddf_pervolume *pv;
2239
2240	pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
2241	switch (event) {
2242	case G_RAID_VOLUME_E_STARTMD:
2243		if (!pv->pv_started)
2244			g_raid_md_ddf_start(vol);
2245		return (0);
2246	}
2247	return (-2);
2248}
2249
2250static int
2251g_raid_md_ctl_ddf(struct g_raid_md_object *md,
2252    struct gctl_req *req)
2253{
2254	struct g_raid_softc *sc;
2255	struct g_raid_volume *vol, *vol1;
2256	struct g_raid_subdisk *sd;
2257	struct g_raid_disk *disk, *disks[DDF_MAX_DISKS_HARD];
2258	struct g_raid_md_ddf_perdisk *pd;
2259	struct g_raid_md_ddf_pervolume *pv;
2260	struct g_raid_md_ddf_object *mdi;
2261	struct ddf_sa_record *sa;
2262	struct g_consumer *cp;
2263	struct g_provider *pp;
2264	char arg[16];
2265	const char *nodename, *verb, *volname, *levelname, *diskname;
2266	char *tmp;
2267	int *nargs, *force;
2268	off_t size, sectorsize, strip, offs[DDF_MAX_DISKS_HARD], esize;
2269	intmax_t *sizearg, *striparg;
2270	int i, numdisks, len, level, qual;
2271	int error;
2272
2273	sc = md->mdo_softc;
2274	mdi = (struct g_raid_md_ddf_object *)md;
2275	verb = gctl_get_param(req, "verb", NULL);
2276	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
2277	error = 0;
2278
2279	if (strcmp(verb, "label") == 0) {
2280
2281		if (*nargs < 4) {
2282			gctl_error(req, "Invalid number of arguments.");
2283			return (-1);
2284		}
2285		volname = gctl_get_asciiparam(req, "arg1");
2286		if (volname == NULL) {
2287			gctl_error(req, "No volume name.");
2288			return (-2);
2289		}
2290		levelname = gctl_get_asciiparam(req, "arg2");
2291		if (levelname == NULL) {
2292			gctl_error(req, "No RAID level.");
2293			return (-3);
2294		}
2295		if (g_raid_volume_str2level(levelname, &level, &qual)) {
2296			gctl_error(req, "Unknown RAID level '%s'.", levelname);
2297			return (-4);
2298		}
2299		numdisks = *nargs - 3;
2300		force = gctl_get_paraml(req, "force", sizeof(*force));
2301		if (!g_raid_md_ddf_supported(level, qual, numdisks,
2302		    force ? *force : 0)) {
2303			gctl_error(req, "Unsupported RAID level "
2304			    "(0x%02x/0x%02x), or number of disks (%d).",
2305			    level, qual, numdisks);
2306			return (-5);
2307		}
2308
2309		/* Search for disks, connect them and probe. */
2310		size = INT64_MAX;
2311		sectorsize = 0;
2312		bzero(disks, sizeof(disks));
2313		bzero(offs, sizeof(offs));
2314		for (i = 0; i < numdisks; i++) {
2315			snprintf(arg, sizeof(arg), "arg%d", i + 3);
2316			diskname = gctl_get_asciiparam(req, arg);
2317			if (diskname == NULL) {
2318				gctl_error(req, "No disk name (%s).", arg);
2319				error = -6;
2320				break;
2321			}
2322			if (strcmp(diskname, "NONE") == 0)
2323				continue;
2324
2325			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2326				if (disk->d_consumer != NULL &&
2327				    disk->d_consumer->provider != NULL &&
2328				    strcmp(disk->d_consumer->provider->name,
2329				     diskname) == 0)
2330					break;
2331			}
2332			if (disk != NULL) {
2333				if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
2334					gctl_error(req, "Disk '%s' is in a "
2335					    "wrong state (%s).", diskname,
2336					    g_raid_disk_state2str(disk->d_state));
2337					error = -7;
2338					break;
2339				}
2340				pd = disk->d_md_data;
2341				if (ddf_meta_count_vdc(&pd->pd_meta, NULL) >=
2342				    GET16(&pd->pd_meta, hdr->Max_Partitions)) {
2343					gctl_error(req, "No free partitions "
2344					    "on disk '%s'.",
2345					    diskname);
2346					error = -7;
2347					break;
2348				}
2349				pp = disk->d_consumer->provider;
2350				disks[i] = disk;
2351				ddf_meta_unused_range(&pd->pd_meta,
2352				    &offs[i], &esize);
2353				offs[i] *= pp->sectorsize;
2354				size = MIN(size, (off_t)esize * pp->sectorsize);
2355				sectorsize = MAX(sectorsize, pp->sectorsize);
2356				continue;
2357			}
2358
2359			g_topology_lock();
2360			cp = g_raid_open_consumer(sc, diskname);
2361			if (cp == NULL) {
2362				gctl_error(req, "Can't open disk '%s'.",
2363				    diskname);
2364				g_topology_unlock();
2365				error = -8;
2366				break;
2367			}
2368			pp = cp->provider;
2369			pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO);
2370			disk = g_raid_create_disk(sc);
2371			disk->d_md_data = (void *)pd;
2372			disk->d_consumer = cp;
2373			disks[i] = disk;
2374			cp->private = disk;
2375			ddf_meta_create(disk, &mdi->mdio_meta);
2376			if (mdi->mdio_meta.hdr == NULL)
2377				ddf_meta_copy(&mdi->mdio_meta, &pd->pd_meta);
2378			else
2379				ddf_meta_update(&mdi->mdio_meta, &pd->pd_meta);
2380			g_topology_unlock();
2381
2382			g_raid_get_disk_info(disk);
2383
2384			/* Reserve some space for metadata. */
2385			size = MIN(size, GET64(&pd->pd_meta,
2386			    pdr->entry[0].Configured_Size) * pp->sectorsize);
2387			sectorsize = MAX(sectorsize, pp->sectorsize);
2388		}
2389		if (error != 0) {
2390			for (i = 0; i < numdisks; i++) {
2391				if (disks[i] != NULL &&
2392				    disks[i]->d_state == G_RAID_DISK_S_NONE)
2393					g_raid_destroy_disk(disks[i]);
2394			}
2395			return (error);
2396		}
2397
2398		if (sectorsize <= 0) {
2399			gctl_error(req, "Can't get sector size.");
2400			return (-8);
2401		}
2402
2403		/* Handle size argument. */
2404		len = sizeof(*sizearg);
2405		sizearg = gctl_get_param(req, "size", &len);
2406		if (sizearg != NULL && len == sizeof(*sizearg) &&
2407		    *sizearg > 0) {
2408			if (*sizearg > size) {
2409				gctl_error(req, "Size too big %lld > %lld.",
2410				    (long long)*sizearg, (long long)size);
2411				return (-9);
2412			}
2413			size = *sizearg;
2414		}
2415
2416		/* Handle strip argument. */
2417		strip = 131072;
2418		len = sizeof(*striparg);
2419		striparg = gctl_get_param(req, "strip", &len);
2420		if (striparg != NULL && len == sizeof(*striparg) &&
2421		    *striparg > 0) {
2422			if (*striparg < sectorsize) {
2423				gctl_error(req, "Strip size too small.");
2424				return (-10);
2425			}
2426			if (*striparg % sectorsize != 0) {
2427				gctl_error(req, "Incorrect strip size.");
2428				return (-11);
2429			}
2430			strip = *striparg;
2431		}
2432
2433		/* Round size down to strip or sector. */
2434		if (level == G_RAID_VOLUME_RL_RAID1 ||
2435		    level == G_RAID_VOLUME_RL_RAID3 ||
2436		    level == G_RAID_VOLUME_RL_SINGLE ||
2437		    level == G_RAID_VOLUME_RL_CONCAT)
2438			size -= (size % sectorsize);
2439		else if (level == G_RAID_VOLUME_RL_RAID1E &&
2440		    (numdisks & 1) != 0)
2441			size -= (size % (2 * strip));
2442		else
2443			size -= (size % strip);
2444		if (size <= 0) {
2445			gctl_error(req, "Size too small.");
2446			return (-13);
2447		}
2448
2449		/* We have all we need, create things: volume, ... */
2450		pv = malloc(sizeof(*pv), M_MD_DDF, M_WAITOK | M_ZERO);
2451		ddf_vol_meta_create(&pv->pv_meta, &mdi->mdio_meta);
2452		pv->pv_started = 1;
2453		vol = g_raid_create_volume(sc, volname, -1);
2454		vol->v_md_data = pv;
2455		vol->v_raid_level = level;
2456		vol->v_raid_level_qualifier = qual;
2457		vol->v_strip_size = strip;
2458		vol->v_disks_count = numdisks;
2459		if (level == G_RAID_VOLUME_RL_RAID0 ||
2460		    level == G_RAID_VOLUME_RL_CONCAT ||
2461		    level == G_RAID_VOLUME_RL_SINGLE)
2462			vol->v_mediasize = size * numdisks;
2463		else if (level == G_RAID_VOLUME_RL_RAID1)
2464			vol->v_mediasize = size;
2465		else if (level == G_RAID_VOLUME_RL_RAID3 ||
2466		    level == G_RAID_VOLUME_RL_RAID4 ||
2467		    level == G_RAID_VOLUME_RL_RAID5)
2468			vol->v_mediasize = size * (numdisks - 1);
2469		else if (level == G_RAID_VOLUME_RL_RAID5R) {
2470			vol->v_mediasize = size * (numdisks - 1);
2471			vol->v_rotate_parity = 1024;
2472		} else if (level == G_RAID_VOLUME_RL_RAID6 ||
2473		    level == G_RAID_VOLUME_RL_RAID5E ||
2474		    level == G_RAID_VOLUME_RL_RAID5EE)
2475			vol->v_mediasize = size * (numdisks - 2);
2476		else if (level == G_RAID_VOLUME_RL_RAIDMDF) {
2477			if (numdisks < 5)
2478				vol->v_mdf_pdisks = 2;
2479			else
2480				vol->v_mdf_pdisks = 3;
2481			vol->v_mdf_polynomial = 0x11d;
2482			vol->v_mdf_method = 0x00;
2483			vol->v_mediasize = size * (numdisks - vol->v_mdf_pdisks);
2484		} else { /* RAID1E */
2485			vol->v_mediasize = ((size * numdisks) / strip / 2) *
2486			    strip;
2487		}
2488		vol->v_sectorsize = sectorsize;
2489		g_raid_start_volume(vol);
2490
2491		/* , and subdisks. */
2492		for (i = 0; i < numdisks; i++) {
2493			disk = disks[i];
2494			sd = &vol->v_subdisks[i];
2495			sd->sd_disk = disk;
2496			sd->sd_offset = offs[i];
2497			sd->sd_size = size;
2498			if (disk == NULL)
2499				continue;
2500			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
2501			g_raid_change_disk_state(disk,
2502			    G_RAID_DISK_S_ACTIVE);
2503			g_raid_change_subdisk_state(sd,
2504			    G_RAID_SUBDISK_S_ACTIVE);
2505			g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
2506			    G_RAID_EVENT_SUBDISK);
2507		}
2508
2509		/* Write metadata based on created entities. */
2510		G_RAID_DEBUG1(0, sc, "Array started.");
2511		g_raid_md_write_ddf(md, vol, NULL, NULL);
2512
2513		/* Pickup any STALE/SPARE disks to refill array if needed. */
2514		g_raid_md_ddf_refill(sc);
2515
2516		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
2517		    G_RAID_EVENT_VOLUME);
2518		return (0);
2519	}
2520	if (strcmp(verb, "add") == 0) {
2521
2522		gctl_error(req, "`add` command is not applicable, "
2523		    "use `label` instead.");
2524		return (-99);
2525	}
2526	if (strcmp(verb, "delete") == 0) {
2527
2528		nodename = gctl_get_asciiparam(req, "arg0");
2529		if (nodename != NULL && strcasecmp(sc->sc_name, nodename) != 0)
2530			nodename = NULL;
2531
2532		/* Full node destruction. */
2533		if (*nargs == 1 && nodename != NULL) {
2534			/* Check if some volume is still open. */
2535			force = gctl_get_paraml(req, "force", sizeof(*force));
2536			if (force != NULL && *force == 0 &&
2537			    g_raid_nopens(sc) != 0) {
2538				gctl_error(req, "Some volume is still open.");
2539				return (-4);
2540			}
2541
2542			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2543				if (disk->d_consumer)
2544					ddf_meta_erase(disk->d_consumer);
2545			}
2546			g_raid_destroy_node(sc, 0);
2547			return (0);
2548		}
2549
2550		/* Destroy specified volume. If it was last - all node. */
2551		if (*nargs > 2) {
2552			gctl_error(req, "Invalid number of arguments.");
2553			return (-1);
2554		}
2555		volname = gctl_get_asciiparam(req,
2556		    nodename != NULL ? "arg1" : "arg0");
2557		if (volname == NULL) {
2558			gctl_error(req, "No volume name.");
2559			return (-2);
2560		}
2561
2562		/* Search for volume. */
2563		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2564			if (strcmp(vol->v_name, volname) == 0)
2565				break;
2566			pp = vol->v_provider;
2567			if (pp == NULL)
2568				continue;
2569			if (strcmp(pp->name, volname) == 0)
2570				break;
2571			if (strncmp(pp->name, "raid/", 5) == 0 &&
2572			    strcmp(pp->name + 5, volname) == 0)
2573				break;
2574		}
2575		if (vol == NULL) {
2576			i = strtol(volname, &tmp, 10);
2577			if (verb != volname && tmp[0] == 0) {
2578				TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2579					if (vol->v_global_id == i)
2580						break;
2581				}
2582			}
2583		}
2584		if (vol == NULL) {
2585			gctl_error(req, "Volume '%s' not found.", volname);
2586			return (-3);
2587		}
2588
2589		/* Check if volume is still open. */
2590		force = gctl_get_paraml(req, "force", sizeof(*force));
2591		if (force != NULL && *force == 0 &&
2592		    vol->v_provider_open != 0) {
2593			gctl_error(req, "Volume is still open.");
2594			return (-4);
2595		}
2596
2597		/* Destroy volume and potentially node. */
2598		i = 0;
2599		TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
2600			i++;
2601		if (i >= 2) {
2602			g_raid_destroy_volume(vol);
2603			g_raid_md_ddf_purge_disks(sc);
2604			g_raid_md_write_ddf(md, NULL, NULL, NULL);
2605		} else {
2606			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2607				if (disk->d_consumer)
2608					ddf_meta_erase(disk->d_consumer);
2609			}
2610			g_raid_destroy_node(sc, 0);
2611		}
2612		return (0);
2613	}
2614	if (strcmp(verb, "remove") == 0 ||
2615	    strcmp(verb, "fail") == 0) {
2616		if (*nargs < 2) {
2617			gctl_error(req, "Invalid number of arguments.");
2618			return (-1);
2619		}
2620		for (i = 1; i < *nargs; i++) {
2621			snprintf(arg, sizeof(arg), "arg%d", i);
2622			diskname = gctl_get_asciiparam(req, arg);
2623			if (diskname == NULL) {
2624				gctl_error(req, "No disk name (%s).", arg);
2625				error = -2;
2626				break;
2627			}
2628			if (strncmp(diskname, "/dev/", 5) == 0)
2629				diskname += 5;
2630
2631			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2632				if (disk->d_consumer != NULL &&
2633				    disk->d_consumer->provider != NULL &&
2634				    strcmp(disk->d_consumer->provider->name,
2635				     diskname) == 0)
2636					break;
2637			}
2638			if (disk == NULL) {
2639				gctl_error(req, "Disk '%s' not found.",
2640				    diskname);
2641				error = -3;
2642				break;
2643			}
2644
2645			if (strcmp(verb, "fail") == 0) {
2646				g_raid_md_fail_disk_ddf(md, NULL, disk);
2647				continue;
2648			}
2649
2650			/* Erase metadata on deleting disk and destroy it. */
2651			ddf_meta_erase(disk->d_consumer);
2652			g_raid_destroy_disk(disk);
2653		}
2654		g_raid_md_ddf_purge_volumes(sc);
2655
2656		/* Write updated metadata to remaining disks. */
2657		g_raid_md_write_ddf(md, NULL, NULL, NULL);
2658
2659		/* Check if anything left. */
2660		if (g_raid_ndisks(sc, -1) == 0)
2661			g_raid_destroy_node(sc, 0);
2662		else
2663			g_raid_md_ddf_refill(sc);
2664		return (error);
2665	}
2666	if (strcmp(verb, "insert") == 0) {
2667		if (*nargs < 2) {
2668			gctl_error(req, "Invalid number of arguments.");
2669			return (-1);
2670		}
2671		for (i = 1; i < *nargs; i++) {
2672			/* Get disk name. */
2673			snprintf(arg, sizeof(arg), "arg%d", i);
2674			diskname = gctl_get_asciiparam(req, arg);
2675			if (diskname == NULL) {
2676				gctl_error(req, "No disk name (%s).", arg);
2677				error = -3;
2678				break;
2679			}
2680
2681			/* Try to find provider with specified name. */
2682			g_topology_lock();
2683			cp = g_raid_open_consumer(sc, diskname);
2684			if (cp == NULL) {
2685				gctl_error(req, "Can't open disk '%s'.",
2686				    diskname);
2687				g_topology_unlock();
2688				error = -4;
2689				break;
2690			}
2691			pp = cp->provider;
2692			g_topology_unlock();
2693
2694			pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO);
2695
2696			disk = g_raid_create_disk(sc);
2697			disk->d_consumer = cp;
2698			disk->d_md_data = (void *)pd;
2699			cp->private = disk;
2700
2701			g_raid_get_disk_info(disk);
2702
2703			/* Welcome the "new" disk. */
2704			g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
2705			ddf_meta_create(disk, &mdi->mdio_meta);
2706			sa = ddf_meta_find_sa(&pd->pd_meta, 1);
2707			if (sa != NULL) {
2708				SET32D(&pd->pd_meta, sa->Signature,
2709				    DDF_SA_SIGNATURE);
2710				SET8D(&pd->pd_meta, sa->Spare_Type, 0);
2711				SET16D(&pd->pd_meta, sa->Populated_SAEs, 0);
2712				SET16D(&pd->pd_meta, sa->MAX_SAE_Supported,
2713				    (GET16(&pd->pd_meta, hdr->Configuration_Record_Length) *
2714				     pd->pd_meta.sectorsize -
2715				     sizeof(struct ddf_sa_record)) /
2716				    sizeof(struct ddf_sa_entry));
2717			}
2718			if (mdi->mdio_meta.hdr == NULL)
2719				ddf_meta_copy(&mdi->mdio_meta, &pd->pd_meta);
2720			else
2721				ddf_meta_update(&mdi->mdio_meta, &pd->pd_meta);
2722			g_raid_md_write_ddf(md, NULL, NULL, NULL);
2723			g_raid_md_ddf_refill(sc);
2724		}
2725		return (error);
2726	}
2727	return (-100);
2728}
2729
2730static int
2731g_raid_md_write_ddf(struct g_raid_md_object *md, struct g_raid_volume *tvol,
2732    struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2733{
2734	struct g_raid_softc *sc;
2735	struct g_raid_volume *vol;
2736	struct g_raid_subdisk *sd;
2737	struct g_raid_disk *disk;
2738	struct g_raid_md_ddf_perdisk *pd;
2739	struct g_raid_md_ddf_pervolume *pv;
2740	struct g_raid_md_ddf_object *mdi;
2741	struct ddf_meta *gmeta;
2742	struct ddf_vol_meta *vmeta;
2743	struct ddf_vdc_record *vdc;
2744	struct ddf_sa_record *sa;
2745	uint64_t *val2;
2746	int i, j, pos, bvd, size;
2747
2748	sc = md->mdo_softc;
2749	mdi = (struct g_raid_md_ddf_object *)md;
2750	gmeta = &mdi->mdio_meta;
2751
2752	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
2753		return (0);
2754
2755	/*
2756	 * Clear disk flags to let only really needed ones to be reset.
2757	 * Do it only if there are no volumes in starting state now,
2758	 * as they can update disk statuses yet and we may kill innocent.
2759	 */
2760	if (mdi->mdio_starting == 0) {
2761		for (i = 0; i < GET16(gmeta, pdr->Populated_PDEs); i++) {
2762			if (isff(gmeta->pdr->entry[i].PD_GUID, 24))
2763				continue;
2764			SET16(gmeta, pdr->entry[i].PD_Type,
2765			    GET16(gmeta, pdr->entry[i].PD_Type) &
2766			    ~(DDF_PDE_PARTICIPATING |
2767			      DDF_PDE_GLOBAL_SPARE | DDF_PDE_CONFIG_SPARE));
2768			if ((GET16(gmeta, pdr->entry[i].PD_State) &
2769			    DDF_PDE_PFA) == 0)
2770				SET16(gmeta, pdr->entry[i].PD_State, 0);
2771		}
2772	}
2773
2774	/* Generate/update new per-volume metadata. */
2775	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2776		pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
2777		if (vol->v_stopping || !pv->pv_started)
2778			continue;
2779		vmeta = &pv->pv_meta;
2780
2781		SET32(vmeta, vdc->Sequence_Number,
2782		    GET32(vmeta, vdc->Sequence_Number) + 1);
2783		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E &&
2784		    vol->v_disks_count % 2 == 0)
2785			SET16(vmeta, vdc->Primary_Element_Count, 2);
2786		else
2787			SET16(vmeta, vdc->Primary_Element_Count,
2788			    vol->v_disks_count);
2789		SET8(vmeta, vdc->Stripe_Size,
2790		    ffs(vol->v_strip_size / vol->v_sectorsize) - 1);
2791		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E &&
2792		    vol->v_disks_count % 2 == 0) {
2793			SET8(vmeta, vdc->Primary_RAID_Level,
2794			    DDF_VDCR_RAID1);
2795			SET8(vmeta, vdc->RLQ, 0);
2796			SET8(vmeta, vdc->Secondary_Element_Count,
2797			    vol->v_disks_count / 2);
2798			SET8(vmeta, vdc->Secondary_RAID_Level, 0);
2799		} else {
2800			SET8(vmeta, vdc->Primary_RAID_Level,
2801			    vol->v_raid_level);
2802			SET8(vmeta, vdc->RLQ,
2803			    vol->v_raid_level_qualifier);
2804			SET8(vmeta, vdc->Secondary_Element_Count, 1);
2805			SET8(vmeta, vdc->Secondary_RAID_Level, 0);
2806		}
2807		SET8(vmeta, vdc->Secondary_Element_Seq, 0);
2808		SET64(vmeta, vdc->Block_Count, 0);
2809		SET64(vmeta, vdc->VD_Size, vol->v_mediasize / vol->v_sectorsize);
2810		SET16(vmeta, vdc->Block_Size, vol->v_sectorsize);
2811		SET8(vmeta, vdc->Rotate_Parity_count,
2812		    fls(vol->v_rotate_parity) - 1);
2813		SET8(vmeta, vdc->MDF_Parity_Disks, vol->v_mdf_pdisks);
2814		SET16(vmeta, vdc->MDF_Parity_Generator_Polynomial,
2815		    vol->v_mdf_polynomial);
2816		SET8(vmeta, vdc->MDF_Constant_Generation_Method,
2817		    vol->v_mdf_method);
2818
2819		SET16(vmeta, vde->VD_Number, vol->v_global_id);
2820		if (vol->v_state <= G_RAID_VOLUME_S_BROKEN)
2821			SET8(vmeta, vde->VD_State, DDF_VDE_FAILED);
2822		else if (vol->v_state <= G_RAID_VOLUME_S_DEGRADED)
2823			SET8(vmeta, vde->VD_State, DDF_VDE_DEGRADED);
2824		else if (vol->v_state <= G_RAID_VOLUME_S_SUBOPTIMAL)
2825			SET8(vmeta, vde->VD_State, DDF_VDE_PARTIAL);
2826		else
2827			SET8(vmeta, vde->VD_State, DDF_VDE_OPTIMAL);
2828		if (vol->v_dirty ||
2829		    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) > 0 ||
2830		    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC) > 0)
2831			SET8(vmeta, vde->VD_State,
2832			    GET8(vmeta, vde->VD_State) | DDF_VDE_DIRTY);
2833		SET8(vmeta, vde->Init_State, DDF_VDE_INIT_FULL); // XXX
2834		ddf_meta_put_name(vmeta, vol->v_name);
2835
2836		for (i = 0; i < vol->v_disks_count; i++) {
2837			sd = &vol->v_subdisks[i];
2838			bvd = i / GET16(vmeta, vdc->Primary_Element_Count);
2839			pos = i % GET16(vmeta, vdc->Primary_Element_Count);
2840			disk = sd->sd_disk;
2841			if (disk != NULL) {
2842				pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
2843				if (vmeta->bvdc[bvd] == NULL) {
2844					size = GET16(vmeta,
2845					    hdr->Configuration_Record_Length) *
2846					    vmeta->sectorsize;
2847					vmeta->bvdc[bvd] = malloc(size,
2848					    M_MD_DDF, M_WAITOK);
2849					memset(vmeta->bvdc[bvd], 0xff, size);
2850				}
2851				memcpy(vmeta->bvdc[bvd], vmeta->vdc,
2852				    sizeof(struct ddf_vdc_record));
2853				SET8(vmeta, bvdc[bvd]->Secondary_Element_Seq, bvd);
2854				SET64(vmeta, bvdc[bvd]->Block_Count,
2855				    sd->sd_size / vol->v_sectorsize);
2856				SET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos],
2857				    GET32(&pd->pd_meta, pdd->PD_Reference));
2858				val2 = (uint64_t *)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[
2859				    GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
2860				SET64P(vmeta, val2 + pos,
2861				    sd->sd_offset / vol->v_sectorsize);
2862			}
2863			if (vmeta->bvdc[bvd] == NULL)
2864				continue;
2865
2866			j = ddf_meta_find_pd(gmeta, NULL,
2867			    GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos]));
2868			if (j < 0)
2869				continue;
2870			SET16(gmeta, pdr->entry[j].PD_Type,
2871			    GET16(gmeta, pdr->entry[j].PD_Type) |
2872			    DDF_PDE_PARTICIPATING);
2873			if (sd->sd_state == G_RAID_SUBDISK_S_NONE)
2874				SET16(gmeta, pdr->entry[j].PD_State,
2875				    GET16(gmeta, pdr->entry[j].PD_State) |
2876				    (DDF_PDE_FAILED | DDF_PDE_MISSING));
2877			else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED)
2878				SET16(gmeta, pdr->entry[j].PD_State,
2879				    GET16(gmeta, pdr->entry[j].PD_State) |
2880				    (DDF_PDE_FAILED | DDF_PDE_PFA));
2881			else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD)
2882				SET16(gmeta, pdr->entry[j].PD_State,
2883				    GET16(gmeta, pdr->entry[j].PD_State) |
2884				    DDF_PDE_REBUILD);
2885			else
2886				SET16(gmeta, pdr->entry[j].PD_State,
2887				    GET16(gmeta, pdr->entry[j].PD_State) |
2888				    DDF_PDE_ONLINE);
2889		}
2890	}
2891
2892	/* Mark spare and failed disks as such. */
2893	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2894		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
2895		i = ddf_meta_find_pd(gmeta, NULL,
2896		    GET32(&pd->pd_meta, pdd->PD_Reference));
2897		if (i < 0)
2898			continue;
2899		if (disk->d_state == G_RAID_DISK_S_FAILED) {
2900			SET16(gmeta, pdr->entry[i].PD_State,
2901			    GET16(gmeta, pdr->entry[i].PD_State) |
2902			    (DDF_PDE_FAILED | DDF_PDE_PFA));
2903		}
2904		if (disk->d_state != G_RAID_DISK_S_SPARE)
2905			continue;
2906		sa = ddf_meta_find_sa(&pd->pd_meta, 0);
2907		if (sa == NULL ||
2908		    (GET8D(&pd->pd_meta, sa->Spare_Type) &
2909		     DDF_SAR_TYPE_DEDICATED) == 0) {
2910			SET16(gmeta, pdr->entry[i].PD_Type,
2911			    GET16(gmeta, pdr->entry[i].PD_Type) |
2912			    DDF_PDE_GLOBAL_SPARE);
2913		} else {
2914			SET16(gmeta, pdr->entry[i].PD_Type,
2915			    GET16(gmeta, pdr->entry[i].PD_Type) |
2916			    DDF_PDE_CONFIG_SPARE);
2917		}
2918		SET16(gmeta, pdr->entry[i].PD_State,
2919		    GET16(gmeta, pdr->entry[i].PD_State) |
2920		    DDF_PDE_ONLINE);
2921	}
2922
2923	/* Remove disks without "participating" flag (unused). */
2924	for (i = 0, j = -1; i < GET16(gmeta, pdr->Populated_PDEs); i++) {
2925		if (isff(gmeta->pdr->entry[i].PD_GUID, 24))
2926			continue;
2927		if ((GET16(gmeta, pdr->entry[i].PD_Type) &
2928		    (DDF_PDE_PARTICIPATING |
2929		     DDF_PDE_GLOBAL_SPARE | DDF_PDE_CONFIG_SPARE)) != 0 ||
2930		    g_raid_md_ddf_get_disk(sc,
2931		     NULL, GET32(gmeta, pdr->entry[i].PD_Reference)) != NULL)
2932			j = i;
2933		else
2934			memset(&gmeta->pdr->entry[i], 0xff,
2935			    sizeof(struct ddf_pd_entry));
2936	}
2937	SET16(gmeta, pdr->Populated_PDEs, j + 1);
2938
2939	/* Update per-disk metadata and write them. */
2940	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2941		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
2942		if (disk->d_state != G_RAID_DISK_S_ACTIVE &&
2943		    disk->d_state != G_RAID_DISK_S_SPARE)
2944			continue;
2945		/* Update PDR. */
2946		memcpy(pd->pd_meta.pdr, gmeta->pdr,
2947		    GET32(&pd->pd_meta, hdr->pdr_length) *
2948		    pd->pd_meta.sectorsize);
2949		/* Update VDR. */
2950		SET16(&pd->pd_meta, vdr->Populated_VDEs, 0);
2951		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2952			if (vol->v_stopping)
2953				continue;
2954			pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
2955			i = ddf_meta_find_vd(&pd->pd_meta,
2956			    pv->pv_meta.vde->VD_GUID);
2957			if (i < 0)
2958				i = ddf_meta_find_vd(&pd->pd_meta, NULL);
2959			if (i >= 0)
2960				memcpy(&pd->pd_meta.vdr->entry[i],
2961				    pv->pv_meta.vde,
2962				    sizeof(struct ddf_vd_entry));
2963		}
2964		/* Update VDC. */
2965		if (mdi->mdio_starting == 0) {
2966			/* Remove all VDCs to restore needed later. */
2967			j = GETCRNUM(&pd->pd_meta);
2968			for (i = 0; i < j; i++) {
2969				vdc = GETVDCPTR(&pd->pd_meta, i);
2970				if (GET32D(&pd->pd_meta, vdc->Signature) !=
2971				    DDF_VDCR_SIGNATURE)
2972					continue;
2973				SET32D(&pd->pd_meta, vdc->Signature, 0xffffffff);
2974			}
2975		}
2976		TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
2977			vol = sd->sd_volume;
2978			if (vol->v_stopping)
2979				continue;
2980			pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
2981			vmeta = &pv->pv_meta;
2982			vdc = ddf_meta_find_vdc(&pd->pd_meta,
2983			    vmeta->vde->VD_GUID);
2984			if (vdc == NULL)
2985				vdc = ddf_meta_find_vdc(&pd->pd_meta, NULL);
2986			if (vdc != NULL) {
2987				bvd = sd->sd_pos / GET16(vmeta,
2988				    vdc->Primary_Element_Count);
2989				memcpy(vdc, vmeta->bvdc[bvd],
2990				    GET16(&pd->pd_meta,
2991				    hdr->Configuration_Record_Length) *
2992				    pd->pd_meta.sectorsize);
2993			}
2994		}
2995		G_RAID_DEBUG(1, "Writing DDF metadata to %s",
2996		    g_raid_get_diskname(disk));
2997		g_raid_md_ddf_print(&pd->pd_meta);
2998		ddf_meta_write(disk->d_consumer, &pd->pd_meta);
2999	}
3000	return (0);
3001}
3002
3003static int
3004g_raid_md_fail_disk_ddf(struct g_raid_md_object *md,
3005    struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
3006{
3007	struct g_raid_softc *sc;
3008	struct g_raid_md_ddf_perdisk *pd;
3009	struct g_raid_subdisk *sd;
3010	int i;
3011
3012	sc = md->mdo_softc;
3013	pd = (struct g_raid_md_ddf_perdisk *)tdisk->d_md_data;
3014
3015	/* We can't fail disk that is not a part of array now. */
3016	if (tdisk->d_state != G_RAID_DISK_S_ACTIVE)
3017		return (-1);
3018
3019	/*
3020	 * Mark disk as failed in metadata and try to write that metadata
3021	 * to the disk itself to prevent it's later resurrection as STALE.
3022	 */
3023	G_RAID_DEBUG(1, "Writing DDF metadata to %s",
3024	    g_raid_get_diskname(tdisk));
3025	i = ddf_meta_find_pd(&pd->pd_meta, NULL, GET32(&pd->pd_meta, pdd->PD_Reference));
3026	SET16(&pd->pd_meta, pdr->entry[i].PD_State, DDF_PDE_FAILED | DDF_PDE_PFA);
3027	if (tdisk->d_consumer != NULL)
3028		ddf_meta_write(tdisk->d_consumer, &pd->pd_meta);
3029
3030	/* Change states. */
3031	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
3032	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
3033		g_raid_change_subdisk_state(sd,
3034		    G_RAID_SUBDISK_S_FAILED);
3035		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
3036		    G_RAID_EVENT_SUBDISK);
3037	}
3038
3039	/* Write updated metadata to remaining disks. */
3040	g_raid_md_write_ddf(md, NULL, NULL, tdisk);
3041
3042	g_raid_md_ddf_refill(sc);
3043	return (0);
3044}
3045
3046static int
3047g_raid_md_free_disk_ddf(struct g_raid_md_object *md,
3048    struct g_raid_disk *disk)
3049{
3050	struct g_raid_md_ddf_perdisk *pd;
3051
3052	pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
3053	ddf_meta_free(&pd->pd_meta);
3054	free(pd, M_MD_DDF);
3055	disk->d_md_data = NULL;
3056	return (0);
3057}
3058
3059static int
3060g_raid_md_free_volume_ddf(struct g_raid_md_object *md,
3061    struct g_raid_volume *vol)
3062{
3063	struct g_raid_md_ddf_object *mdi;
3064	struct g_raid_md_ddf_pervolume *pv;
3065
3066	mdi = (struct g_raid_md_ddf_object *)md;
3067	pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
3068	ddf_vol_meta_free(&pv->pv_meta);
3069	if (!pv->pv_started) {
3070		pv->pv_started = 1;
3071		mdi->mdio_starting--;
3072		callout_stop(&pv->pv_start_co);
3073	}
3074	free(pv, M_MD_DDF);
3075	vol->v_md_data = NULL;
3076	return (0);
3077}
3078
3079static int
3080g_raid_md_free_ddf(struct g_raid_md_object *md)
3081{
3082	struct g_raid_md_ddf_object *mdi;
3083
3084	mdi = (struct g_raid_md_ddf_object *)md;
3085	if (!mdi->mdio_started) {
3086		mdi->mdio_started = 0;
3087		callout_stop(&mdi->mdio_start_co);
3088		G_RAID_DEBUG1(1, md->mdo_softc,
3089		    "root_mount_rel %p", mdi->mdio_rootmount);
3090		root_mount_rel(mdi->mdio_rootmount);
3091		mdi->mdio_rootmount = NULL;
3092	}
3093	ddf_meta_free(&mdi->mdio_meta);
3094	return (0);
3095}
3096
3097G_RAID_MD_DECLARE(ddf, "DDF");
3098