xref: /illumos-gate/usr/src/cmd/format/analyze.c (revision b12aaafb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This file contains routines to analyze the surface of a disk.
28  */
29 #include "global.h"
30 #include "analyze.h"
31 #include <stdlib.h>
32 #include <errno.h>
33 #include "misc.h"
34 #include "defect.h"
35 #include "label.h"
36 #include "param.h"
37 #include "checkdev.h"
38 
39 
40 /*
41  * These global variables control the surface analysis process.  They
42  * are set from a command in the defect menu.
43  */
44 int	scan_entire = 1;		/* scan whole disk flag */
45 diskaddr_t	scan_lower = 0;			/* lower bound */
46 diskaddr_t	scan_upper = 0;			/* upper bound */
47 int	scan_correct = 1;		/* correct errors flag */
48 int	scan_stop = 0;			/* stop after error flag */
49 int	scan_loop = 0;			/* loop forever flag */
50 int	scan_passes = 2;		/* number of passes */
51 int	scan_random = 0;		/* random patterns flag */
52 uint_t	scan_size = 0;			/* sectors/scan operation */
53 int	scan_auto = 1;			/* scan after format flag */
54 int	scan_restore_defects = 1;	/* restore defect list after writing */
55 int	scan_restore_label = 1;		/* restore label after writing */
56 
57 /*
58  * These are summary variables to print out info after analysis.
59  * Values less than 0 imply they are invalid.
60  */
61 offset_t	scan_cur_block = -1;		/* current block */
62 int64_t		scan_blocks_fixed = -1;		/* # blocks repaired */
63 
64 /*
65  * This variable is used to tell whether the most recent surface
66  * analysis error was caused by a media defect or some other problem.
67  */
68 int	media_error;			/* error was caused by defect */
69 
70 int	disk_error;			/* disk errors during analysis */
71 
72 /*
73  * These are the data patterns used if random patterns are not chosen.
74  * They are designed to show pattern dependent errors.
75  */
76 static unsigned int	scan_patterns[] = {
77 	0xc6dec6de,
78 	0x6db6db6d,
79 	0x00000000,
80 	0xffffffff,
81 	0xaaaaaaaa,
82 };
83 #define	NPATTERNS	5		/* number of predefined patterns */
84 
85 /*
86  * These are the data patterns from the SunFed requirements document.
87  */
88 static unsigned int purge_patterns[] = {	/* patterns to be written */
89 	0xaaaaaaaa,		/* 10101010... */
90 	0x55555555,		/* 01010101...  == UUUU... */
91 	0xaaaaaaaa,		/* 10101010... */
92 	0xaaaaaaaa,		/* 10101010... */
93 };
94 
95 static unsigned int alpha_pattern =  0x40404040;   /* 10000000...  == @@@@... */
96 
97 static int	scan_repair(diskaddr_t bn, int mode);
98 static int	analyze_blocks(int flags, diskaddr_t blkno, uint_t blkcnt,
99 		unsigned data, int init, int driver_flags, int *xfercntp);
100 static int	handle_error_conditions(void);
101 static int	verify_blocks(int flags, diskaddr_t blkno, uint_t blkcnt,
102 		unsigned data, int driver_flags, int *xfercntp);
103 
104 /*
105  * This routine performs a surface analysis based upon the global
106  * parameters.  It is called from several commands in the defect menu,
107  * and from the format command in the command menu (if post-format
108  * analysis is enable).
109  */
110 int
do_scan(int flags,int mode)111 do_scan(int flags, int mode)
112 {
113 	diskaddr_t	start, end, curnt;
114 	int	pass, needinit, data;
115 	uint_t	size;
116 	int	status, founderr, i, j;
117 	int	error = 0;
118 	int	pattern = 0;
119 	int	xfercnt;
120 
121 	/*
122 	 * Check to be sure we aren't correcting without a defect list
123 	 * if the controller can correct the defect.
124 	 */
125 	if (scan_correct && !EMBEDDED_SCSI && (cur_ops->op_repair != NULL) &&
126 	    (cur_list.list == NULL)) {
127 		err_print("Current Defect List must be initialized ");
128 		err_print("to do automatic repair.\n");
129 		return (-1);
130 	}
131 	/*
132 	 * Define the bounds of the scan.
133 	 */
134 	if (scan_entire) {
135 		start = 0;
136 		if (cur_label == L_TYPE_SOLARIS) {
137 			if (cur_ctype->ctype_flags & CF_SCSI)
138 				end = datasects() - 1;
139 			else
140 				end = physsects() - 1;
141 		} else if (cur_label == L_TYPE_EFI) {
142 			end = cur_parts->etoc->efi_last_lba;
143 		}
144 	} else {
145 		start = scan_lower;
146 		end = scan_upper;
147 	}
148 	/*
149 	 * Make sure the user knows if we are scanning over a mounted
150 	 * partition.
151 	 */
152 	if ((flags & (SCAN_PATTERN | SCAN_WRITE)) &&
153 	    (checkmount(start, end))) {
154 		err_print("Cannot do analysis on a mounted partition.\n");
155 		return (-1);
156 	}
157 
158 	/*
159 	 * Make sure the user knows if we are scanning over a
160 	 * partition being used for swapping.
161 	 */
162 	if ((flags & (SCAN_PATTERN | SCAN_WRITE)) &&
163 	    (checkswap(start, end))) {
164 		err_print("Cannot do analysis on a partition \
165 		    which is currently being used for swapping.\n");
166 		return (-1);
167 	}
168 
169 	/*
170 	 * Check to see if any partitions used for svm, vxvm, ZFS zpool
171 	 * or live upgrade are on the disk.
172 	 */
173 	if ((flags & (SCAN_PATTERN | SCAN_WRITE)) &&
174 	    (checkdevinuse(cur_disk->disk_name, (diskaddr_t)-1,
175 	    (diskaddr_t)-1, 0, 0))) {
176 		err_print("Cannot do analysis on a partition "
177 		    "while it in use as described above.\n");
178 		return (-1);
179 	}
180 
181 	/*
182 	 * If we are scanning destructively over certain sectors,
183 	 * we mark the defect list and/or label dirty so it will get rewritten.
184 	 */
185 	if (flags & (SCAN_PATTERN | SCAN_WRITE)) {
186 		if (cur_label == L_TYPE_SOLARIS) {
187 			if (start < (diskaddr_t)totalsects() &&
188 			    end >= (diskaddr_t)datasects()) {
189 				if (!EMBEDDED_SCSI) {
190 					cur_list.flags |= LIST_DIRTY;
191 				}
192 				if (cur_disk->disk_flags & DSK_LABEL)
193 					cur_flags |= LABEL_DIRTY;
194 			}
195 		}
196 		if (start == 0) {
197 			if (cur_disk->disk_flags & DSK_LABEL)
198 				cur_flags |= LABEL_DIRTY;
199 		}
200 	}
201 	/*
202 	 * Initialize the summary info on sectors repaired.
203 	 */
204 	scan_blocks_fixed = 0;
205 	/*
206 	 * Loop through the passes of the scan. If required, loop forever.
207 	 */
208 	for (pass = 0; pass < scan_passes || scan_loop; pass++) {
209 		/*
210 		 * Determine the data pattern to use if pattern testing
211 		 * is to be done.
212 		 */
213 		if (flags & SCAN_PATTERN) {
214 			if (scan_random)
215 				data = (int)mrand48();
216 			else
217 				data = scan_patterns[pass % NPPATTERNS];
218 
219 			if (flags & SCAN_PURGE) {
220 				flags &= ~(SCAN_PURGE_READ_PASS
221 				    | SCAN_PURGE_ALPHA_PASS);
222 				switch (pattern % (NPPATTERNS + 1)) {
223 				case NPPATTERNS:
224 					pattern = 0;
225 					if (!error) {
226 						fmt_print(
227 "\nThe last %d passes were successful, running alpha pattern pass", NPPATTERNS);
228 						flags |= SCAN_PURGE_ALPHA_PASS;
229 						data = alpha_pattern;
230 					} else {
231 						data = purge_patterns[pattern];
232 						pattern++;
233 					};
234 					break;
235 				case READPATTERN:
236 					flags |=  SCAN_PURGE_READ_PASS;
237 					/* FALLTHROUGH */
238 				default:
239 					data = purge_patterns[pattern];
240 					pattern++;
241 					break;
242 				}
243 			}
244 			fmt_print("\n        pass %d", pass);
245 			fmt_print(" - pattern = 0x%x", data);
246 		} else
247 			fmt_print("\n        pass %d", pass);
248 
249 		fmt_print("\n");
250 		/*
251 		 * Mark the pattern buffer as corrupt, since it
252 		 * hasn't been initialized.
253 		 */
254 		needinit = 1;
255 		/*
256 		 * Print the first block number to the log file if
257 		 * logging is on so there is some record of what
258 		 * analysis was performed.
259 		 */
260 		if (log_file) {
261 			pr_dblock(log_print, start);
262 			log_print("\n");
263 		}
264 		/*
265 		 * Loop through this pass, each time analyzing an amount
266 		 * specified by the global parameters.
267 		 */
268 		xfercnt = 0;
269 		for (curnt = start; curnt <= end; curnt += size) {
270 			if ((end - curnt) < scan_size)
271 				size = end - curnt + 1;
272 			else
273 				size = scan_size;
274 			/*
275 			 * Print out where we are, so we don't look dead.
276 			 * Also store it in summary info for logging.
277 			 */
278 			scan_cur_block = curnt;
279 			nolog_print("   ");
280 			pr_dblock(nolog_print, curnt);
281 			nolog_print("  \015");
282 			(void) fflush(stdout);
283 			disk_error = 0;
284 			/*
285 			 * Do the actual analysis.
286 			 */
287 			status = analyze_blocks(flags, curnt, size,
288 			    (unsigned)data, needinit, (F_ALLERRS | F_SILENT),
289 			    &xfercnt);
290 			/*
291 			 * If there were no errors, the pattern buffer is
292 			 * still initialized, and we just loop to next chunk.
293 			 */
294 			needinit = 0;
295 			if (!status)
296 				continue;
297 			/*
298 			 * There was an error. Check if surface analysis
299 			 * can be continued.
300 			 */
301 			if (handle_error_conditions()) {
302 				scan_blocks_fixed = scan_cur_block = -1;
303 				return (-1);
304 			}
305 			/*
306 			 * There was an error. Mark the pattern buffer
307 			 * corrupt so it will get reinitialized.
308 			 */
309 			needinit = 1;
310 			/*
311 			 * If it was not a media error, ignore it.
312 			 */
313 			if (!media_error)
314 				continue;
315 			/*
316 			 * Loop 5 times through each sector of the chunk,
317 			 * analyzing them individually.
318 			 */
319 			nolog_print("   ");
320 			pr_dblock(nolog_print, curnt);
321 			nolog_print("  \015");
322 			(void) fflush(stdout);
323 			founderr = 0;
324 			for (j = 0; j < size * 5; j++) {
325 				i = j % size;
326 				disk_error = 0;
327 				status = analyze_blocks(flags, (curnt + i), 1,
328 				    (unsigned)data, needinit, F_ALLERRS, NULL);
329 				needinit = 0;
330 				if (!status)
331 					continue;
332 				/*
333 				 * There was an error. Check if surface analysis
334 				 * can be continued.
335 				 */
336 				if (handle_error_conditions()) {
337 					scan_blocks_fixed = scan_cur_block = -1;
338 					return (-1);
339 				}
340 				/*
341 				 * An error occurred.  Mark the buffer
342 				 * corrupt and see if it was media
343 				 * related.
344 				 */
345 				needinit = 1;
346 				if (!media_error)
347 					continue;
348 				/*
349 				 * We found a bad sector. Print out a message
350 				 * and fix it if required.
351 				 */
352 				founderr = 1;
353 				if (scan_correct && (flags != SCAN_VALID)) {
354 					if (scan_repair(curnt+i, mode)) {
355 						error = -1;
356 					}
357 				} else
358 					err_print("\n");
359 				/*
360 				 * Stop after the error if required.
361 				 */
362 				if (scan_stop)
363 					goto out;
364 			}
365 			/*
366 			 * Mark the pattern buffer corrupt to be safe.
367 			 */
368 			needinit = 1;
369 			/*
370 			 * We didn't find an individual sector that was bad.
371 			 * Print out a warning.
372 			 */
373 			if (!founderr) {
374 				err_print("Warning: unable to pinpoint ");
375 				err_print("defective block.\n");
376 			}
377 		}
378 		/*
379 		 * Print the end of each pass to the log file.
380 		 */
381 		enter_critical();
382 		if (log_file) {
383 			pr_dblock(log_print, scan_cur_block);
384 			log_print("\n");
385 		}
386 		scan_cur_block = -1;
387 		exit_critical();
388 		fmt_print("\n");
389 
390 		/*
391 		 * alternate the read and write for SCAN_VERIFY test
392 		 */
393 		if (flags & SCAN_VERIFY) {
394 			flags ^= SCAN_VERIFY_READ_PASS;
395 		}
396 	}
397 out:
398 	/*
399 	 * We got here either by giving up after an error or falling
400 	 * through after all passes were completed.
401 	 */
402 	fmt_print("\n");
403 	enter_critical();
404 	/*
405 	 * If the defect list is dirty, write it to disk,
406 	 * if scan_restore_defects (the default) is true.
407 	 */
408 	if (!EMBEDDED_SCSI && (cur_list.flags & LIST_DIRTY) &&
409 	    (scan_restore_defects)) {
410 		cur_list.flags = 0;
411 		write_deflist(&cur_list);
412 		}
413 	/*
414 	 * If the label is dirty, write it to disk.
415 	 * if scan_restore_label (the default) is true.
416 	 */
417 	if ((cur_flags & LABEL_DIRTY) && (scan_restore_label)) {
418 		cur_flags &= ~LABEL_DIRTY;
419 		(void) write_label();
420 	}
421 	/*
422 	 * If we dropped down to here after an error, we need to write
423 	 * the final block number to the log file for record keeping.
424 	 */
425 	if (log_file && scan_cur_block >= 0) {
426 		pr_dblock(log_print, scan_cur_block);
427 		log_print("\n");
428 	}
429 	fmt_print("Total of %lld defective blocks repaired.\n",
430 	    scan_blocks_fixed);
431 	/*
432 	 * Reinitialize the logging variables so they don't get used
433 	 * when they are not really valid.
434 	 */
435 	scan_blocks_fixed = scan_cur_block = -1;
436 	exit_critical();
437 	return (error);
438 }
439 
440 
441 /*
442  * This routine is called to repair a bad block discovered
443  * during a scan operation.  Return 0 for success, 1 for failure.
444  * (This has been extracted out of do_scan(), to simplify it.)
445  */
446 static int
scan_repair(diskaddr_t bn,int mode)447 scan_repair(diskaddr_t bn, int mode)
448 {
449 	int	status;
450 	int	result = 1;
451 	char	*buf;
452 	int	buf_is_good;
453 	int	i;
454 
455 	if (cur_ops->op_repair == NULL) {
456 		err_print("Warning: Controller does ");
457 		err_print("not support repairing.\n\n");
458 		return (result);
459 	}
460 
461 	buf = malloc(cur_blksz);
462 	if (buf == NULL) {
463 		err_print("Warning: no memory.\n\n");
464 		return (result);
465 	}
466 	enter_critical();
467 
468 	/*
469 	 * Determine if the error appears to be hard or soft.  We
470 	 * already assume there's an error.  If we can get any
471 	 * good data out of the sector, write that data back
472 	 * after the repair.
473 	 */
474 	buf_is_good = 0;
475 	for (i = 0; i < 5; i++) {
476 		status = (*cur_ops->op_rdwr)(DIR_READ, cur_file, bn, 1,
477 		    buf, F_SILENT, NULL);
478 		if (status == 0) {
479 			buf_is_good = 1;
480 			break;
481 		}
482 	}
483 
484 	fmt_print("Repairing %s error on %llu (",
485 	    buf_is_good ? "soft" : "hard", bn);
486 	pr_dblock(fmt_print, bn);
487 	fmt_print(")...");
488 
489 	status = (*cur_ops->op_repair)(bn, mode);
490 	if (status) {
491 		/*
492 		 * If the repair failed, we note it and will return the
493 		 * failure. However, the analysis goes on.
494 		 */
495 		fmt_print("failed.\n\n");
496 	} else {
497 		/*
498 		 * The repair worked.  Write the good data we could
499 		 * recover from the failed block, if possible.
500 		 * If not, zero the block.  In doing so, try to
501 		 * determine if the new block appears ok.
502 		 */
503 		if (!buf_is_good) {
504 			bzero(buf, cur_blksz);
505 			fmt_print("Warning: Block %llu zero-filled.\n", bn);
506 		} else {
507 			fmt_print("ok.\n");
508 		}
509 		status = (*cur_ops->op_rdwr)(DIR_WRITE, cur_file, bn,
510 		    1, buf, (F_SILENT | F_ALLERRS), NULL);
511 		if (status == 0) {
512 			status = (*cur_ops->op_rdwr)(DIR_READ, cur_file, bn,
513 			    1, buf, (F_SILENT | F_ALLERRS), NULL);
514 		}
515 		if (status) {
516 			fmt_print("The new block also appears defective.\n");
517 		}
518 		fmt_print("\n");
519 		/*
520 		 * add the defect to the list and write the list out.
521 		 * Also, kill the working list so it will get resynced
522 		 * with the current list.
523 		 *
524 		 * For embedded scsi, we don't require a defect list.
525 		 * However, if we have one, add the defect if the
526 		 * list includes the grown list.  If not, kill it
527 		 * to force a resync if we need the list later.
528 		 */
529 		if (EMBEDDED_SCSI) {
530 			if (cur_list.list != NULL) {
531 				if (cur_list.flags & LIST_PGLIST) {
532 					add_ldef(bn, &cur_list);
533 				} else {
534 					kill_deflist(&cur_list);
535 				}
536 			}
537 		/*
538 		 * The next "if" statement reflects the fix for
539 		 * bug id 1026096 where format keeps adding the
540 		 * same defect to the defect list.
541 		 */
542 		} else if (cur_ctype->ctype_flags & CF_WLIST) {
543 			kill_deflist(&cur_list);
544 			(*cur_ops->op_ex_cur)(&cur_list);
545 			fmt_print("Current list updated\n");
546 		} else {
547 			add_ldef(bn, &cur_list);
548 			write_deflist(&cur_list);
549 		}
550 		kill_deflist(&work_list);
551 
552 		/* Log the repair.  */
553 		scan_blocks_fixed++;
554 
555 		/* return ok */
556 		result = 0;
557 	}
558 
559 	exit_critical();
560 	free(buf);
561 	return (result);
562 }
563 
564 
565 /*
566  * This routine analyzes a set of sectors on the disk.  It simply returns
567  * an error if a defect is found.  It is called by do_scan().
568  */
569 static int
analyze_blocks(int flags,diskaddr_t blkno,uint_t blkcnt,unsigned data,int init,int driver_flags,int * xfercntp)570 analyze_blocks(int flags, diskaddr_t blkno, uint_t blkcnt, unsigned data,
571     int init, int driver_flags, int *xfercntp)
572 {
573 	int		corrupt = 0;
574 	int		status;
575 	diskaddr_t	i, nints;
576 	unsigned *ptr = (uint_t *)pattern_buf;
577 
578 	media_error = 0;
579 	if (flags & SCAN_VERIFY) {
580 		return (verify_blocks(flags, blkno, blkcnt, data,
581 		    driver_flags, xfercntp));
582 	}
583 
584 	/*
585 	 * Initialize the pattern buffer if necessary.
586 	 */
587 	nints = (diskaddr_t)blkcnt * cur_blksz / sizeof (int);
588 	if ((flags & SCAN_PATTERN) && init) {
589 		for (i = 0; i < nints; i++)
590 			*((int *)((int *)pattern_buf + i)) = data;
591 	}
592 	/*
593 	 * Lock out interrupts so we can insure valid data will get
594 	 * restored. This is necessary because there are modes
595 	 * of scanning that corrupt the disk data then restore it at
596 	 * the end of the analysis.
597 	 */
598 	enter_critical();
599 	/*
600 	 * If the disk data is valid, read it into the data buffer.
601 	 */
602 	if (flags & SCAN_VALID) {
603 		status = (*cur_ops->op_rdwr)(DIR_READ, cur_file, blkno,
604 		    blkcnt, (caddr_t)cur_buf, driver_flags, xfercntp);
605 		if (status)
606 			goto bad;
607 	}
608 	/*
609 	 * If we are doing pattern testing, write and read the pattern
610 	 * from the pattern buffer.
611 	 */
612 	if (flags & SCAN_PATTERN) {
613 		/*
614 		 * If the disk data was valid, mark it corrupt so we know
615 		 * to restore it later.
616 		 */
617 		if (flags & SCAN_VALID)
618 			corrupt++;
619 		/*
620 		 * Only write if we're not on the read pass of SCAN_PURGE.
621 		 */
622 		if (!(flags & SCAN_PURGE_READ_PASS)) {
623 			status = (*cur_ops->op_rdwr)(DIR_WRITE, cur_file, blkno,
624 			    blkcnt, (caddr_t)pattern_buf, driver_flags,
625 			    xfercntp);
626 			if (status)
627 				goto bad;
628 		}
629 		/*
630 		 * Only read if we are on the read pass of SCAN_PURGE, if we
631 		 * are purging.
632 		 */
633 		if ((!(flags & SCAN_PURGE)) || (flags & SCAN_PURGE_READ_PASS)) {
634 			status = (*cur_ops->op_rdwr)(DIR_READ, cur_file, blkno,
635 			    blkcnt, (caddr_t)pattern_buf, driver_flags,
636 			    xfercntp);
637 			if (status)
638 				goto bad;
639 		}
640 	}
641 	/*
642 	 * If we are doing a data compare, make sure the pattern
643 	 * came back intact.
644 	 * Only compare if we are on the read pass of SCAN_PURGE, or
645 	 * we wrote random data instead of the expected data pattern.
646 	 */
647 	if ((flags & SCAN_COMPARE) || (flags & SCAN_PURGE_READ_PASS)) {
648 		for (i = nints, ptr = (uint_t *)pattern_buf; i; i--)
649 			if (*ptr++ != data) {
650 				err_print("Data miscompare error (expecting ");
651 				err_print("0x%x, got 0x%x) at ", data,
652 				    *((int *)((int *)pattern_buf +
653 				    (nints - i))));
654 				pr_dblock(err_print, blkno);
655 				err_print(", offset = 0x%llx.\n",
656 				    (nints - i) * sizeof (int));
657 				goto bad;
658 			}
659 	}
660 	/*
661 	 * If we are supposed to write data out, do so.
662 	 */
663 	if (flags & SCAN_WRITE) {
664 		status = (*cur_ops->op_rdwr)(DIR_WRITE, cur_file, blkno,
665 		    blkcnt, (caddr_t)cur_buf, driver_flags, xfercntp);
666 		if (status)
667 			goto bad;
668 	}
669 	exit_critical();
670 	/*
671 	 * No errors occurred, return ok.
672 	 */
673 	return (0);
674 bad:
675 	/*
676 	 * There was an error.  If the data was corrupted, we write it
677 	 * out from the data buffer to restore it.
678 	 */
679 	if (corrupt) {
680 		if ((*cur_ops->op_rdwr)(DIR_WRITE, cur_file, blkno,
681 		    blkcnt, (caddr_t)cur_buf, F_NORMAL, xfercntp))
682 		err_print("Warning: unable to restore original data.\n");
683 	}
684 	exit_critical();
685 	/*
686 	 * Return the error.
687 	 */
688 	return (-1);
689 }
690 
691 
692 /*
693  * This routine analyzes a set of sectors on the disk. It simply returns
694  * an error if a defect is found.  It is called by analyze_blocks().
695  * For simplicity, this is done as a separate function instead of
696  * making the analyze_block routine complex.
697  *
698  * This routine implements the 'verify' command.  It writes the disk
699  * by writing unique data for each block; after the write pass, it
700  * reads the data and verifies for correctness. Note that the entire
701  * disk (or the range of disk) is fully written first and then read.
702  * This should eliminate any caching effect on the drives.
703  */
704 static int
verify_blocks(int flags,diskaddr_t blkno,uint_t blkcnt,unsigned data,int driver_flags,int * xfercntp)705 verify_blocks(int flags, diskaddr_t blkno, uint_t blkcnt, unsigned data,
706     int driver_flags, int *xfercntp)
707 {
708 	int		status, i, nints;
709 	unsigned	*ptr = (uint_t *)pattern_buf;
710 
711 	nints = cur_blksz / sizeof (int);
712 
713 	/*
714 	 * Initialize the pattern buffer if we are in write pass.
715 	 * Use the block number itself as data, each block has unique
716 	 * buffer data that way.
717 	 */
718 	if (!(flags & SCAN_VERIFY_READ_PASS)) {
719 		for (data = blkno; data < blkno + blkcnt; data++) {
720 			for (i = 0; i < nints; i++) {
721 				*ptr++ = data;
722 			}
723 		}
724 		ptr = (uint_t *)pattern_buf;
725 	}
726 
727 	/*
728 	 * Only write if we're not on the read pass of SCAN_VERIFY.
729 	 */
730 	if (!(flags & SCAN_VERIFY_READ_PASS)) {
731 		status = (*cur_ops->op_rdwr)(DIR_WRITE, cur_file, blkno,
732 		    blkcnt, (caddr_t)pattern_buf, driver_flags, xfercntp);
733 		if (status)
734 			goto bad;
735 	} else {
736 		/*
737 		 * Only read if we are on the read pass of SCAN_VERIFY
738 		 */
739 		status = (*cur_ops->op_rdwr)(DIR_READ, cur_file, blkno,
740 		    blkcnt, (caddr_t)pattern_buf, driver_flags, xfercntp);
741 		if (status)
742 			goto bad;
743 		/*
744 		 * compare and make sure the pattern came back intact.
745 		 */
746 		for (data = blkno; data < blkno + blkcnt; data++) {
747 			for (i = 0; i < nints; i++) {
748 				if (*ptr++ != data) {
749 					ptr--;
750 					err_print("Data miscompare error "
751 					    "(expecting 0x%x, got 0x%x) at ",
752 					    data, *ptr);
753 					pr_dblock(err_print, blkno);
754 					err_print(", offset = 0x%x.\n",
755 					    (ptr - (uint_t *)pattern_buf) *
756 					    sizeof (int));
757 					goto bad;
758 				}
759 			}
760 		}
761 	}
762 	/*
763 	 * No errors occurred, return ok.
764 	 */
765 	return (0);
766 bad:
767 	return (-1);
768 }
769 
770 
771 static int
handle_error_conditions(void)772 handle_error_conditions(void)
773 {
774 
775 	/*
776 	 * Check if the errno is ENXIO.
777 	 */
778 	if (errno == ENXIO) {
779 		fmt_print("\n\nWarning:Cannot access drive, ");
780 		fmt_print("aborting surface analysis.\n");
781 		return (-1);
782 	}
783 	/*
784 	 * check for disk errors
785 	 */
786 	switch (disk_error) {
787 	case DISK_STAT_RESERVED:
788 	case DISK_STAT_UNAVAILABLE:
789 		fmt_print("\n\nWarning:Drive may be reserved ");
790 		fmt_print("or has been removed, ");
791 		fmt_print("aborting surface analysis.\n");
792 		return (-1);
793 	case DISK_STAT_NOTREADY:
794 		fmt_print("\n\nWarning: Drive not ready, ");
795 		fmt_print("aborting surface analysis.\n");
796 		return (-1);
797 	case DISK_STAT_DATA_PROTECT:
798 		fmt_print("\n\nWarning: Drive is write protected, ");
799 		fmt_print("aborting surface analysis.\n");
800 		return (-1);
801 	default:
802 		break;
803 	}
804 	return (0);
805 }
806