1/* fsys_reiserfs.c - an implementation for the ReiserFS filesystem */
2/*
3 *  GRUB  --  GRand Unified Bootloader
4 *  Copyright (C) 2000, 2001  Free Software Foundation, Inc.
5 *
6 *  This program is free software; you can redistribute it and/or modify
7 *  it under the terms of the GNU General Public License as published by
8 *  the Free Software Foundation; either version 2 of the License, or
9 *  (at your option) any later version.
10 *
11 *  This program is distributed in the hope that it will be useful,
12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 *  GNU General Public License for more details.
15 *
16 *  You should have received a copy of the GNU General Public License
17 *  along with this program; if not, write to the Free Software
18 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#ifdef FSYS_REISERFS
22#include "shared.h"
23#include "filesys.h"
24
25#undef REISERDEBUG
26
27/* Some parts of this code (mainly the structures and defines) are
28 * from the original reiser fs code, as found in the linux kernel.
29 */
30
31/* include/asm-i386/types.h */
32typedef __signed__ char __s8;
33typedef unsigned char __u8;
34typedef __signed__ short __s16;
35typedef unsigned short __u16;
36typedef __signed__ int __s32;
37typedef unsigned int __u32;
38typedef unsigned long long __u64;
39
40/* linux/posix_type.h */
41typedef long linux_off_t;
42
43/* linux/little_endian.h */
44#define __cpu_to_le64(x) ((__u64) (x))
45#define __le64_to_cpu(x) ((__u64) (x))
46#define __cpu_to_le32(x) ((__u32) (x))
47#define __le32_to_cpu(x) ((__u32) (x))
48#define __cpu_to_le16(x) ((__u16) (x))
49#define __le16_to_cpu(x) ((__u16) (x))
50
51/* include/linux/reiser_fs.h */
52/* This is the new super block of a journaling reiserfs system */
53struct reiserfs_super_block
54{
55  __u32 s_block_count;			/* blocks count         */
56  __u32 s_free_blocks;                  /* free blocks count    */
57  __u32 s_root_block;           	/* root block number    */
58  __u32 s_journal_block;           	/* journal block number    */
59  __u32 s_journal_dev;           	/* journal device number  */
60  __u32 s_journal_size; 		/* size of the journal on FS creation.  used to make sure they don't overflow it */
61  __u32 s_journal_trans_max;            /* max number of blocks in a transaction.  */
62  __u32 s_journal_magic;                /* random value made on fs creation */
63  __u32 s_journal_max_batch;            /* max number of blocks to batch into a trans */
64  __u32 s_journal_max_commit_age;       /* in seconds, how old can an async commit be */
65  __u32 s_journal_max_trans_age;        /* in seconds, how old can a transaction be */
66  __u16 s_blocksize;                   	/* block size           */
67  __u16 s_oid_maxsize;			/* max size of object id array  */
68  __u16 s_oid_cursize;			/* current size of object id array */
69  __u16 s_state;                       	/* valid or error       */
70  char s_magic[16];                     /* reiserfs magic string indicates that file system is reiserfs */
71  __u16 s_tree_height;                  /* height of disk tree */
72  __u16 s_bmap_nr;                      /* amount of bitmap blocks needed to address each block of file system */
73  __u16 s_version;
74  char s_unused[128];			/* zero filled by mkreiserfs */
75};
76
77#define REISERFS_MAX_SUPPORTED_VERSION 2
78#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
79#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
80#define REISER3FS_SUPER_MAGIC_STRING "ReIsEr3Fs"
81
82#define MAX_HEIGHT 7
83
84/* must be correct to keep the desc and commit structs at 4k */
85#define JOURNAL_TRANS_HALF 1018
86
87/* first block written in a commit.  */
88struct reiserfs_journal_desc {
89  __u32 j_trans_id;			/* id of commit */
90  __u32 j_len;				/* length of commit. len +1 is the commit block */
91  __u32 j_mount_id;			/* mount id of this trans*/
92  __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the first blocks */
93  char j_magic[12];
94};
95
96/* last block written in a commit */
97struct reiserfs_journal_commit {
98  __u32 j_trans_id;			/* must match j_trans_id from the desc block */
99  __u32 j_len;			/* ditto */
100  __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the last blocks */
101  char j_digest[16];			/* md5 sum of all the blocks involved, including desc and commit. not used, kill it */
102};
103
104/* this header block gets written whenever a transaction is considered
105   fully flushed, and is more recent than the last fully flushed
106   transaction.
107   fully flushed means all the log blocks and all the real blocks are
108   on disk, and this transaction does not need to be replayed.
109*/
110struct reiserfs_journal_header {
111  /* id of last fully flushed transaction */
112  __u32 j_last_flush_trans_id;
113  /* offset in the log of where to start replay after a crash */
114  __u32 j_first_unflushed_offset;
115  /* mount id to detect very old transactions */
116  __u32 j_mount_id;
117};
118
119/* magic string to find desc blocks in the journal */
120#define JOURNAL_DESC_MAGIC "ReIsErLB"
121
122
123/*
124 * directories use this key as well as old files
125 */
126struct offset_v1
127{
128  /*
129   * for regular files this is the offset to the first byte of the
130   * body, contained in the object-item, as measured from the start of
131   * the entire body of the object.
132   *
133   * for directory entries, k_offset consists of hash derived from
134   * hashing the name and using few bits (23 or more) of the resulting
135   * hash, and generation number that allows distinguishing names with
136   * hash collisions. If number of collisions overflows generation
137   * number, we return EEXIST.  High order bit is 0 always
138   */
139  __u32 k_offset;
140  __u32 k_uniqueness;
141};
142
143struct offset_v2
144{
145  /*
146   * for regular files this is the offset to the first byte of the
147   * body, contained in the object-item, as measured from the start of
148   * the entire body of the object.
149   *
150   * for directory entries, k_offset consists of hash derived from
151   * hashing the name and using few bits (23 or more) of the resulting
152   * hash, and generation number that allows distinguishing names with
153   * hash collisions. If number of collisions overflows generation
154   * number, we return EEXIST.  High order bit is 0 always
155   */
156  __u64 k_offset:60;
157  __u64 k_type: 4;
158};
159
160
161struct key
162{
163  /* packing locality: by default parent directory object id */
164  __u32 k_dir_id;
165  /* object identifier */
166  __u32 k_objectid;
167  /* the offset and node type (old and new form) */
168  union
169  {
170    struct offset_v1 v1;
171    struct offset_v2 v2;
172  }
173  u;
174};
175
176#define KEY_SIZE (sizeof (struct key))
177
178/* Header of a disk block.  More precisely, header of a formatted leaf
179   or internal node, and not the header of an unformatted node. */
180struct block_head
181{
182  __u16 blk_level;        /* Level of a block in the tree. */
183  __u16 blk_nr_item;      /* Number of keys/items in a block. */
184  __u16 blk_free_space;   /* Block free space in bytes. */
185  struct key  blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes
186				      only) */
187};
188#define BLKH_SIZE (sizeof (struct block_head))
189#define DISK_LEAF_NODE_LEVEL  1 /* Leaf node level.                       */
190
191struct item_head
192{
193  struct key ih_key; 	/* Everything in the tree is found by searching for it based on its key.*/
194
195  union
196  {
197    __u16 ih_free_space; /* The free space in the last unformatted node of an indirect item if this
198			    is an indirect item.  This equals 0xFFFF iff this is a direct item or
199			    stat data item. Note that the key, not this field, is used to determine
200			    the item type, and thus which field this union contains. */
201    __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory
202			     entries in the directory item. */
203  }
204  u;
205  __u16 ih_item_len;           /* total size of the item body                  */
206  __u16 ih_item_location;      /* an offset to the item body within the block  */
207  __u16 ih_version;	       /* ITEM_VERSION_1 for all old items,
208				  ITEM_VERSION_2 for new ones.
209				  Highest bit is set by fsck
210                                  temporary, cleaned after all done */
211};
212/* size of item header     */
213#define IH_SIZE (sizeof (struct item_head))
214
215#define ITEM_VERSION_1 0
216#define ITEM_VERSION_2 1
217#define IH_KEY_OFFSET(ih) ((ih)->ih_version == ITEM_VERSION_1 \
218			   ? (ih)->ih_key.u.v1.k_offset \
219			   : (ih)->ih_key.u.v2.k_offset)
220
221#define IH_KEY_ISTYPE(ih, type) ((ih)->ih_version == ITEM_VERSION_1 \
222				 ? (ih)->ih_key.u.v1.k_uniqueness == V1_##type \
223				 : (ih)->ih_key.u.v2.k_type == V2_##type)
224
225struct disk_child
226{
227  unsigned long       dc_block_number;              /* Disk child's block number. */
228  unsigned short      dc_size;		            /* Disk child's used space.   */
229};
230
231#define DC_SIZE (sizeof (struct disk_child))
232
233/* Stat Data on disk.
234 *
235 * Note that reiserfs has two different forms of stat data.  Luckily
236 * the fields needed by grub are at the same position.
237 */
238struct stat_data
239{
240  __u16 sd_mode;	/* file type, permissions */
241  __u16 sd_notused1[3]; /* fields not needed by reiserfs */
242  __u32 sd_size;	/* file size */
243  __u32 sd_size_hi;	/* file size high 32 bits (since version 2) */
244};
245
246struct reiserfs_de_head
247{
248  __u32 deh_offset;  /* third component of the directory entry key */
249  __u32 deh_dir_id;  /* objectid of the parent directory of the
250			object, that is referenced by directory entry */
251  __u32 deh_objectid;/* objectid of the object, that is referenced by
252                        directory entry */
253  __u16 deh_location;/* offset of name in the whole item */
254  __u16 deh_state;   /* whether 1) entry contains stat data (for
255			future), and 2) whether entry is hidden
256			(unlinked) */
257};
258
259#define DEH_SIZE (sizeof (struct reiserfs_de_head))
260
261#define DEH_Statdata (1 << 0)			/* not used now */
262#define DEH_Visible  (1 << 2)
263
264#define SD_OFFSET  0
265#define SD_UNIQUENESS 0
266#define DOT_OFFSET 1
267#define DOT_DOT_OFFSET 2
268#define DIRENTRY_UNIQUENESS 500
269
270#define V1_TYPE_STAT_DATA 0x0
271#define V1_TYPE_DIRECT 0xffffffff
272#define V1_TYPE_INDIRECT 0xfffffffe
273#define V1_TYPE_DIRECTORY_MAX 0xfffffffd
274#define V2_TYPE_STAT_DATA 0
275#define V2_TYPE_INDIRECT 1
276#define V2_TYPE_DIRECT 2
277#define V2_TYPE_DIRENTRY 3
278
279#define REISERFS_ROOT_OBJECTID 2
280#define REISERFS_ROOT_PARENT_OBJECTID 1
281#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
282/* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */
283#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
284#define REISERFS_OLD_BLOCKSIZE 4096
285
286#define S_ISREG(mode) (((mode) & 0170000) == 0100000)
287#define S_ISDIR(mode) (((mode) & 0170000) == 0040000)
288#define S_ISLNK(mode) (((mode) & 0170000) == 0120000)
289
290#define PATH_MAX       1024	/* include/linux/limits.h */
291#define MAX_LINK_COUNT    5	/* number of symbolic links to follow */
292
293/* The size of the node cache */
294#define FSYSREISER_CACHE_SIZE 24*1024
295#define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE
296#define FSYSREISER_MAX_BLOCKSIZE FSYSREISER_CACHE_SIZE / 3
297
298/* Info about currently opened file */
299struct fsys_reiser_fileinfo
300{
301  __u32 k_dir_id;
302  __u32 k_objectid;
303};
304
305/* In memory info about the currently mounted filesystem */
306struct fsys_reiser_info
307{
308  /* The last read item head */
309  struct item_head *current_ih;
310  /* The last read item */
311  char *current_item;
312  /* The information for the currently opened file */
313  struct fsys_reiser_fileinfo fileinfo;
314  /* The start of the journal */
315  __u32 journal_block;
316  /* The size of the journal */
317  __u32 journal_block_count;
318  /* The first valid descriptor block in journal
319     (relative to journal_block) */
320  __u32 journal_first_desc;
321
322  /* The ReiserFS version. */
323  __u16 version;
324  /* The current depth of the reiser tree. */
325  __u16 tree_depth;
326  /* SECTOR_SIZE << blocksize_shift == blocksize. */
327  __u8  blocksize_shift;
328  /* 1 << full_blocksize_shift == blocksize. */
329  __u8  fullblocksize_shift;
330  /* The reiserfs block size  (must be a power of 2) */
331  __u16 blocksize;
332  /* The number of cached tree nodes */
333  __u16 cached_slots;
334  /* The number of valid transactions in journal */
335  __u16 journal_transactions;
336
337  unsigned int blocks[MAX_HEIGHT];
338  unsigned int next_key_nr[MAX_HEIGHT];
339};
340
341/* The cached s+tree blocks in FSYS_BUF,  see below
342 * for a more detailed description.
343 */
344#define ROOT     ((char *) ((int) FSYS_BUF))
345#define CACHE(i) (ROOT + ((i) << INFO->fullblocksize_shift))
346#define LEAF     CACHE (DISK_LEAF_NODE_LEVEL)
347
348#define BLOCKHEAD(cache) ((struct block_head *) cache)
349#define ITEMHEAD         ((struct item_head  *) ((int) LEAF + BLKH_SIZE))
350#define KEY(cache)       ((struct key        *) ((int) cache + BLKH_SIZE))
351#define DC(cache)        ((struct disk_child *) \
352			  ((int) cache + BLKH_SIZE + KEY_SIZE * nr_item))
353/* The fsys_reiser_info block.
354 */
355#define INFO \
356    ((struct fsys_reiser_info *) ((int) FSYS_BUF + FSYSREISER_CACHE_SIZE))
357/*
358 * The journal cache.  For each transaction it contains the number of
359 * blocks followed by the real block numbers of this transaction.
360 *
361 * If the block numbers of some transaction won't fit in this space,
362 * this list is stopped with a 0xffffffff marker and the remaining
363 * uncommitted transactions aren't cached.
364 */
365#define JOURNAL_START    ((__u32 *) (INFO + 1))
366#define JOURNAL_END      ((__u32 *) (FSYS_BUF + FSYS_BUFLEN))
367
368
369static __inline__ unsigned long
370grub_log2 (unsigned long word)
371{
372  __asm__ ("bsfl %1,%0"
373	   : "=r" (word)
374	   : "r" (word));
375  return word;
376}
377#define log2 grub_log2
378
379static __inline__ int
380is_power_of_two (unsigned long word)
381{
382  return (word & -word) == word;
383}
384
385static int
386journal_read (int block, int len, char *buffer)
387{
388  return devread ((INFO->journal_block + block) << INFO->blocksize_shift,
389		  0, len, buffer);
390}
391
392/* Read a block from ReiserFS file system, taking the journal into
393 * account.  If the block nr is in the journal, the block from the
394 * journal taken.
395 */
396static int
397block_read (int blockNr, int start, int len, char *buffer)
398{
399  int transactions = INFO->journal_transactions;
400  int desc_block = INFO->journal_first_desc;
401  int journal_mask = INFO->journal_block_count - 1;
402  int translatedNr = blockNr;
403  __u32 *journal_table = JOURNAL_START;
404  while (transactions-- > 0)
405    {
406      int i = 0;
407      int j_len;
408      if (*journal_table != 0xffffffff)
409	{
410	  /* Search for the blockNr in cached journal */
411	  j_len = *journal_table++;
412	  while (i++ < j_len)
413	    {
414	      if (*journal_table++ == blockNr)
415		{
416		  journal_table += j_len - i;
417		  goto found;
418		}
419	    }
420	}
421      else
422	{
423	  /* This is the end of cached journal marker.  The remaining
424	   * transactions are still on disk.
425	   */
426	  struct reiserfs_journal_desc   desc;
427	  struct reiserfs_journal_commit commit;
428
429	  if (! journal_read (desc_block, sizeof (desc), (char *) &desc))
430	    return 0;
431
432	  j_len = desc.j_len;
433	  while (i < j_len && i < JOURNAL_TRANS_HALF)
434	    if (desc.j_realblock[i++] == blockNr)
435	      goto found;
436
437	  if (j_len >= JOURNAL_TRANS_HALF)
438	    {
439	      int commit_block = (desc_block + 1 + j_len) & journal_mask;
440	      if (! journal_read (commit_block,
441				  sizeof (commit), (char *) &commit))
442		return 0;
443	      while (i < j_len)
444		if (commit.j_realblock[i++ - JOURNAL_TRANS_HALF] == blockNr)
445		  goto found;
446	    }
447	}
448      goto not_found;
449
450    found:
451      translatedNr = INFO->journal_block + ((desc_block + i) & journal_mask);
452#ifdef REISERDEBUG
453      printf ("block_read: block %d is mapped to journal block %d.\n",
454	      blockNr, translatedNr - INFO->journal_block);
455#endif
456      /* We must continue the search, as this block may be overwritten
457       * in later transactions.
458       */
459    not_found:
460      desc_block = (desc_block + 2 + j_len) & journal_mask;
461    }
462  return devread (translatedNr << INFO->blocksize_shift, start, len, buffer);
463}
464
465/* Init the journal data structure.  We try to cache as much as
466 * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
467 * we can still read the rest from the disk on demand.
468 *
469 * The first number of valid transactions and the descriptor block of the
470 * first valid transaction are held in INFO.  The transactions are all
471 * adjacent, but we must take care of the journal wrap around.
472 */
473static int
474journal_init (void)
475{
476  unsigned int block_count = INFO->journal_block_count;
477  unsigned int desc_block;
478  unsigned int commit_block;
479  unsigned int next_trans_id;
480  struct reiserfs_journal_header header;
481  struct reiserfs_journal_desc   desc;
482  struct reiserfs_journal_commit commit;
483  __u32 *journal_table = JOURNAL_START;
484
485  journal_read (block_count, sizeof (header), (char *) &header);
486  desc_block = header.j_first_unflushed_offset;
487  if (desc_block >= block_count)
488    return 0;
489
490  INFO->journal_first_desc = desc_block;
491  next_trans_id = header.j_last_flush_trans_id + 1;
492
493#ifdef REISERDEBUG
494  printf ("journal_init: last flushed %d\n",
495	  header.j_last_flush_trans_id);
496#endif
497
498  while (1)
499    {
500      journal_read (desc_block, sizeof (desc), (char *) &desc);
501      if (substring (JOURNAL_DESC_MAGIC, desc.j_magic) > 0
502	  || desc.j_trans_id != next_trans_id
503	  || desc.j_mount_id != header.j_mount_id)
504	/* no more valid transactions */
505	break;
506
507      commit_block = (desc_block + desc.j_len + 1) & (block_count - 1);
508      journal_read (commit_block, sizeof (commit), (char *) &commit);
509      if (desc.j_trans_id != commit.j_trans_id
510	  || desc.j_len != commit.j_len)
511	/* no more valid transactions */
512	break;
513
514#ifdef REISERDEBUG
515      printf ("Found valid transaction %d/%d at %d.\n",
516	      desc.j_trans_id, desc.j_mount_id, desc_block);
517#endif
518
519      next_trans_id++;
520      if (journal_table < JOURNAL_END)
521	{
522	  if ((journal_table + 1 + desc.j_len) >= JOURNAL_END)
523	    {
524	      /* The table is almost full; mark the end of the cached
525	       * journal.*/
526	      *journal_table = 0xffffffff;
527	      journal_table = JOURNAL_END;
528	    }
529	  else
530	    {
531	      int i;
532	      /* Cache the length and the realblock numbers in the table.
533	       * The block number of descriptor can easily be computed.
534	       * and need not to be stored here.
535	       */
536	      *journal_table++ = desc.j_len;
537	      for (i = 0; i < desc.j_len && i < JOURNAL_TRANS_HALF; i++)
538		{
539		  *journal_table++ = desc.j_realblock[i];
540#ifdef REISERDEBUG
541		  printf ("block %d is in journal %d.\n",
542			  desc.j_realblock[i], desc_block);
543#endif
544		}
545	      for (     ; i < desc.j_len; i++)
546		{
547		  *journal_table++ = commit.j_realblock[i-JOURNAL_TRANS_HALF];
548#ifdef REISERDEBUG
549		  printf ("block %d is in journal %d.\n",
550			  commit.j_realblock[i-JOURNAL_TRANS_HALF],
551			  desc_block);
552#endif
553		}
554	    }
555	}
556      desc_block = (commit_block + 1) & (block_count - 1);
557    }
558#ifdef REISERDEBUG
559  printf ("Transaction %d/%d at %d isn't valid.\n",
560	  desc.j_trans_id, desc.j_mount_id, desc_block);
561#endif
562
563  INFO->journal_transactions
564    = next_trans_id - header.j_last_flush_trans_id - 1;
565  return errnum == 0;
566}
567
568/* check filesystem types and read superblock into memory buffer */
569int
570reiserfs_mount (void)
571{
572  struct reiserfs_super_block super;
573  int superblock = REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
574
575  if (part_length < superblock + (sizeof (super) >> SECTOR_BITS)
576      || ! devread (superblock, 0, sizeof (struct reiserfs_super_block),
577		(char *) &super)
578      || (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0
579	  && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
580	  && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
581      || (/* check that this is not a copy inside the journal log */
582	  super.s_journal_block * super.s_blocksize
583	  <= REISERFS_DISK_OFFSET_IN_BYTES))
584    {
585      /* Try old super block position */
586      superblock = REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
587      if (part_length < superblock + (sizeof (super) >> SECTOR_BITS)
588	  || ! devread (superblock, 0, sizeof (struct reiserfs_super_block),
589			(char *) &super))
590	return 0;
591
592      if (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0
593	  && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
594	  && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
595	{
596	  /* pre journaling super block ? */
597	  if (substring (REISERFS_SUPER_MAGIC_STRING,
598			 (char*) ((int) &super + 20)) > 0)
599	    return 0;
600
601	  super.s_blocksize = REISERFS_OLD_BLOCKSIZE;
602	  super.s_journal_block = 0;
603	  super.s_version = 0;
604	}
605    }
606
607  /* check the version number.  */
608  if (super.s_version > REISERFS_MAX_SUPPORTED_VERSION)
609    return 0;
610
611  INFO->version = super.s_version;
612  INFO->blocksize = super.s_blocksize;
613  INFO->fullblocksize_shift = log2 (super.s_blocksize);
614  INFO->blocksize_shift = INFO->fullblocksize_shift - SECTOR_BITS;
615  INFO->cached_slots =
616    (FSYSREISER_CACHE_SIZE >> INFO->fullblocksize_shift) - 1;
617
618#ifdef REISERDEBUG
619  printf ("reiserfs_mount: version=%d, blocksize=%d\n",
620	  INFO->version, INFO->blocksize);
621#endif /* REISERDEBUG */
622
623  /* Clear node cache. */
624  memset (INFO->blocks, 0, sizeof (INFO->blocks));
625
626  if (super.s_blocksize < FSYSREISER_MIN_BLOCKSIZE
627      || super.s_blocksize > FSYSREISER_MAX_BLOCKSIZE
628      || (SECTOR_SIZE << INFO->blocksize_shift) != super.s_blocksize)
629    return 0;
630
631  /* Initialize journal code.  If something fails we end with zero
632   * journal_transactions, so we don't access the journal at all.
633   */
634  INFO->journal_transactions = 0;
635  if (super.s_journal_block != 0 && super.s_journal_dev == 0)
636    {
637      INFO->journal_block = super.s_journal_block;
638      INFO->journal_block_count = super.s_journal_size;
639      if (is_power_of_two (INFO->journal_block_count))
640	journal_init ();
641
642      /* Read in super block again, maybe it is in the journal */
643      block_read (superblock >> INFO->blocksize_shift,
644		  0, sizeof (struct reiserfs_super_block), (char *) &super);
645    }
646
647  if (! block_read (super.s_root_block, 0, INFO->blocksize, (char*) ROOT))
648    return 0;
649
650  INFO->tree_depth = BLOCKHEAD (ROOT)->blk_level;
651
652#ifdef REISERDEBUG
653  printf ("root read_in: block=%d, depth=%d\n",
654	  super.s_root_block, INFO->tree_depth);
655#endif /* REISERDEBUG */
656
657  if (INFO->tree_depth >= MAX_HEIGHT)
658    return 0;
659  if (INFO->tree_depth == DISK_LEAF_NODE_LEVEL)
660    {
661      /* There is only one node in the whole filesystem,
662       * which is simultanously leaf and root */
663      memcpy (LEAF, ROOT, INFO->blocksize);
664    }
665  return 1;
666}
667
668/***************** TREE ACCESSING METHODS *****************************/
669
670/* I assume you are familiar with the ReiserFS tree, if not go to
671 * http://www.namesys.com/content_table.html
672 *
673 * My tree node cache is organized as following
674 *   0   ROOT node
675 *   1   LEAF node  (if the ROOT is also a LEAF it is copied here
676 *   2-n other nodes on current path from bottom to top.
677 *       if there is not enough space in the cache, the top most are
678 *       omitted.
679 *
680 * I have only two methods to find a key in the tree:
681 *   search_stat(dir_id, objectid) searches for the stat entry (always
682 *       the first entry) of an object.
683 *   next_key() gets the next key in tree order.
684 *
685 * This means, that I can only sequential reads of files are
686 * efficient, but this really doesn't hurt for grub.
687 */
688
689/* Read in the node at the current path and depth into the node cache.
690 * You must set INFO->blocks[depth] before.
691 */
692static char *
693read_tree_node (unsigned int blockNr, int depth)
694{
695  char* cache = CACHE(depth);
696  int num_cached = INFO->cached_slots;
697  if (depth < num_cached)
698    {
699      /* This is the cached part of the path.  Check if same block is
700       * needed.
701       */
702      if (blockNr == INFO->blocks[depth])
703	return cache;
704    }
705  else
706    cache = CACHE(num_cached);
707
708#ifdef REISERDEBUG
709  printf ("  next read_in: block=%d (depth=%d)\n",
710	  blockNr, depth);
711#endif /* REISERDEBUG */
712  if (! block_read (blockNr, 0, INFO->blocksize, cache))
713    return 0;
714  /* Make sure it has the right node level */
715  if (BLOCKHEAD (cache)->blk_level != depth)
716    {
717      errnum = ERR_FSYS_CORRUPT;
718      return 0;
719    }
720
721  INFO->blocks[depth] = blockNr;
722  return cache;
723}
724
725/* Get the next key, i.e. the key following the last retrieved key in
726 * tree order.  INFO->current_ih and
727 * INFO->current_info are adapted accordingly.  */
728static int
729next_key (void)
730{
731  int depth;
732  struct item_head *ih = INFO->current_ih + 1;
733  char *cache;
734
735#ifdef REISERDEBUG
736  printf ("next_key:\n  old ih: key %d:%d:%d:%d version:%d\n",
737	  INFO->current_ih->ih_key.k_dir_id,
738	  INFO->current_ih->ih_key.k_objectid,
739	  INFO->current_ih->ih_key.u.v1.k_offset,
740	  INFO->current_ih->ih_key.u.v1.k_uniqueness,
741	  INFO->current_ih->ih_version);
742#endif /* REISERDEBUG */
743
744  if (ih == &ITEMHEAD[BLOCKHEAD (LEAF)->blk_nr_item])
745    {
746      depth = DISK_LEAF_NODE_LEVEL;
747      /* The last item, was the last in the leaf node.
748       * Read in the next block
749       */
750      do
751	{
752	  if (depth == INFO->tree_depth)
753	    {
754	      /* There are no more keys at all.
755	       * Return a dummy item with MAX_KEY */
756	      ih = (struct item_head *) &BLOCKHEAD (LEAF)->blk_right_delim_key;
757	      goto found;
758	    }
759	  depth++;
760#ifdef REISERDEBUG
761	  printf ("  depth=%d, i=%d\n", depth, INFO->next_key_nr[depth]);
762#endif /* REISERDEBUG */
763	}
764      while (INFO->next_key_nr[depth] == 0);
765
766      if (depth == INFO->tree_depth)
767	cache = ROOT;
768      else if (depth <= INFO->cached_slots)
769	cache = CACHE (depth);
770      else
771	{
772	  cache = read_tree_node (INFO->blocks[depth], depth);
773	  if (! cache)
774	    return 0;
775	}
776
777      do
778	{
779	  int nr_item = BLOCKHEAD (cache)->blk_nr_item;
780	  int key_nr = INFO->next_key_nr[depth]++;
781#ifdef REISERDEBUG
782	  printf ("  depth=%d, i=%d/%d\n", depth, key_nr, nr_item);
783#endif /* REISERDEBUG */
784	  if (key_nr == nr_item)
785	    /* This is the last item in this block, set the next_key_nr to 0 */
786	    INFO->next_key_nr[depth] = 0;
787
788	  cache = read_tree_node (DC (cache)[key_nr].dc_block_number, --depth);
789	  if (! cache)
790	    return 0;
791	}
792      while (depth > DISK_LEAF_NODE_LEVEL);
793
794      ih = ITEMHEAD;
795    }
796 found:
797  INFO->current_ih   = ih;
798  INFO->current_item = &LEAF[ih->ih_item_location];
799#ifdef REISERDEBUG
800  printf ("  new ih: key %d:%d:%d:%d version:%d\n",
801	  INFO->current_ih->ih_key.k_dir_id,
802	  INFO->current_ih->ih_key.k_objectid,
803	  INFO->current_ih->ih_key.u.v1.k_offset,
804	  INFO->current_ih->ih_key.u.v1.k_uniqueness,
805	  INFO->current_ih->ih_version);
806#endif /* REISERDEBUG */
807  return 1;
808}
809
810/* preconditions: reiserfs_mount already executed, therefore
811 *   INFO block is valid
812 * returns: 0 if error (errnum is set),
813 *   nonzero iff we were able to find the key successfully.
814 * postconditions: on a nonzero return, the current_ih and
815 *   current_item fields describe the key that equals the
816 *   searched key.  INFO->next_key contains the next key after
817 *   the searched key.
818 * side effects: messes around with the cache.
819 */
820static int
821search_stat (__u32 dir_id, __u32 objectid)
822{
823  char *cache;
824  int depth;
825  int nr_item;
826  int i;
827  struct item_head *ih;
828#ifdef REISERDEBUG
829  printf ("search_stat:\n  key %d:%d:0:0\n", dir_id, objectid);
830#endif /* REISERDEBUG */
831
832  depth = INFO->tree_depth;
833  cache = ROOT;
834
835  while (depth > DISK_LEAF_NODE_LEVEL)
836    {
837      struct key *key;
838      nr_item = BLOCKHEAD (cache)->blk_nr_item;
839
840      key = KEY (cache);
841
842      for (i = 0; i < nr_item; i++)
843	{
844	  if (key->k_dir_id > dir_id
845	      || (key->k_dir_id == dir_id
846		  && (key->k_objectid > objectid
847		      || (key->k_objectid == objectid
848			  && (key->u.v1.k_offset
849			      | key->u.v1.k_uniqueness) > 0))))
850	    break;
851	  key++;
852	}
853
854#ifdef REISERDEBUG
855      printf ("  depth=%d, i=%d/%d\n", depth, i, nr_item);
856#endif /* REISERDEBUG */
857      INFO->next_key_nr[depth] = (i == nr_item) ? 0 : i+1;
858      cache = read_tree_node (DC (cache)[i].dc_block_number, --depth);
859      if (! cache)
860	return 0;
861    }
862
863  /* cache == LEAF */
864  nr_item = BLOCKHEAD (LEAF)->blk_nr_item;
865  ih = ITEMHEAD;
866  for (i = 0; i < nr_item; i++)
867    {
868      if (ih->ih_key.k_dir_id == dir_id
869	  && ih->ih_key.k_objectid == objectid
870	  && ih->ih_key.u.v1.k_offset == 0
871	  && ih->ih_key.u.v1.k_uniqueness == 0)
872	{
873#ifdef REISERDEBUG
874	  printf ("  depth=%d, i=%d/%d\n", depth, i, nr_item);
875#endif /* REISERDEBUG */
876	  INFO->current_ih   = ih;
877	  INFO->current_item = &LEAF[ih->ih_item_location];
878	  return 1;
879	}
880      ih++;
881    }
882  errnum = ERR_FSYS_CORRUPT;
883  return 0;
884}
885
886int
887reiserfs_read (char *buf, int len)
888{
889  unsigned int blocksize;
890  unsigned int offset;
891  unsigned int to_read;
892  char *prev_buf = buf;
893
894#ifdef REISERDEBUG
895  printf ("reiserfs_read: filepos=%d len=%d, offset=%x:%x\n",
896	  filepos, len, (__u64) IH_KEY_OFFSET (INFO->current_ih) - 1);
897#endif /* REISERDEBUG */
898
899  if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
900      || IH_KEY_OFFSET (INFO->current_ih) > filepos + 1)
901    {
902      search_stat (INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid);
903      goto get_next_key;
904    }
905
906  while (! errnum)
907    {
908      if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid)
909	break;
910
911      offset = filepos - IH_KEY_OFFSET (INFO->current_ih) + 1;
912      blocksize = INFO->current_ih->ih_item_len;
913
914#ifdef REISERDEBUG
915      printf ("  loop: filepos=%d len=%d, offset=%d blocksize=%d\n",
916	      filepos, len, offset, blocksize);
917#endif /* REISERDEBUG */
918
919      if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_DIRECT)
920	  && offset < blocksize)
921	{
922#ifdef REISERDEBUG
923	  printf ("direct_read: offset=%d, blocksize=%d\n",
924		  offset, blocksize);
925#endif /* REISERDEBUG */
926	  to_read = blocksize - offset;
927	  if (to_read > len)
928	    to_read = len;
929
930	  if (disk_read_hook != NULL)
931	    {
932	      disk_read_func = disk_read_hook;
933
934	      block_read (INFO->blocks[DISK_LEAF_NODE_LEVEL],
935			  (INFO->current_item - LEAF + offset), to_read, buf);
936
937	      disk_read_func = NULL;
938	    }
939	  else
940	    memcpy (buf, INFO->current_item + offset, to_read);
941	  goto update_buf_len;
942	}
943      else if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_INDIRECT))
944	{
945	  blocksize = (blocksize >> 2) << INFO->fullblocksize_shift;
946#ifdef REISERDEBUG
947	  printf ("indirect_read: offset=%d, blocksize=%d\n",
948		  offset, blocksize);
949#endif /* REISERDEBUG */
950
951	  while (offset < blocksize)
952	    {
953	      __u32 blocknr = ((__u32 *) INFO->current_item)
954		[offset >> INFO->fullblocksize_shift];
955	      int blk_offset = offset & (INFO->blocksize-1);
956
957	      to_read = INFO->blocksize - blk_offset;
958	      if (to_read > len)
959		to_read = len;
960
961	      disk_read_func = disk_read_hook;
962
963	      /* Journal is only for meta data.  Data blocks can be read
964	       * directly without using block_read
965	       */
966	      devread (blocknr << INFO->blocksize_shift,
967		       blk_offset, to_read, buf);
968
969	      disk_read_func = NULL;
970	    update_buf_len:
971	      len -= to_read;
972	      buf += to_read;
973	      offset += to_read;
974	      filepos += to_read;
975	      if (len == 0)
976		goto done;
977	    }
978	}
979    get_next_key:
980      next_key ();
981    }
982 done:
983  return errnum ? 0 : buf - prev_buf;
984}
985
986
987/* preconditions: reiserfs_mount already executed, therefore
988 *   INFO block is valid
989 * returns: 0 if error, nonzero iff we were able to find the file successfully
990 * postconditions: on a nonzero return, INFO->fileinfo contains the info
991 *   of the file we were trying to look up, filepos is 0 and filemax is
992 *   the size of the file.
993 */
994int
995reiserfs_dir (char *dirname)
996{
997  struct reiserfs_de_head *de_head;
998  char *rest, ch;
999  __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
1000#ifndef STAGE1_5
1001  int do_possibilities = 0;
1002#endif /* ! STAGE1_5 */
1003  char linkbuf[PATH_MAX];	/* buffer for following symbolic links */
1004  int link_count = 0;
1005  int mode;
1006
1007  dir_id = REISERFS_ROOT_PARENT_OBJECTID;
1008  objectid = REISERFS_ROOT_OBJECTID;
1009
1010  while (1)
1011    {
1012#ifdef REISERDEBUG
1013      printf ("dirname=%s\n", dirname);
1014#endif /* REISERDEBUG */
1015
1016      /* Search for the stat info first. */
1017      if (! search_stat (dir_id, objectid))
1018	return 0;
1019
1020#ifdef REISERDEBUG
1021      printf ("sd_mode=%x sd_size=%d\n",
1022	      ((struct stat_data *) INFO->current_item)->sd_mode,
1023	      ((struct stat_data *) INFO->current_item)->sd_size);
1024#endif /* REISERDEBUG */
1025
1026      mode = ((struct stat_data *) INFO->current_item)->sd_mode;
1027
1028      /* If we've got a symbolic link, then chase it. */
1029      if (S_ISLNK (mode))
1030	{
1031	  int len;
1032	  if (++link_count > MAX_LINK_COUNT)
1033	    {
1034	      errnum = ERR_SYMLINK_LOOP;
1035	      return 0;
1036	    }
1037
1038	  /* Get the symlink size. */
1039	  filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1040
1041	  /* Find out how long our remaining name is. */
1042	  len = 0;
1043	  while (dirname[len] && !isspace (dirname[len]))
1044	    len++;
1045
1046	  if (filemax + len > sizeof (linkbuf) - 1)
1047	    {
1048	      errnum = ERR_FILELENGTH;
1049	      return 0;
1050	    }
1051
1052	  /* Copy the remaining name to the end of the symlink data.
1053	     Note that DIRNAME and LINKBUF may overlap! */
1054	  grub_memmove (linkbuf + filemax, dirname, len+1);
1055
1056	  INFO->fileinfo.k_dir_id = dir_id;
1057	  INFO->fileinfo.k_objectid = objectid;
1058  	  filepos = 0;
1059	  if (! next_key ()
1060	      || reiserfs_read (linkbuf, filemax) != filemax)
1061	    {
1062	      if (! errnum)
1063		errnum = ERR_FSYS_CORRUPT;
1064	      return 0;
1065	    }
1066
1067#ifdef REISERDEBUG
1068	  printf ("symlink=%s\n", linkbuf);
1069#endif /* REISERDEBUG */
1070
1071	  dirname = linkbuf;
1072	  if (*dirname == '/')
1073	    {
1074	      /* It's an absolute link, so look it up in root. */
1075	      dir_id = REISERFS_ROOT_PARENT_OBJECTID;
1076	      objectid = REISERFS_ROOT_OBJECTID;
1077	    }
1078	  else
1079	    {
1080	      /* Relative, so look it up in our parent directory. */
1081	      dir_id   = parent_dir_id;
1082	      objectid = parent_objectid;
1083	    }
1084
1085	  /* Now lookup the new name. */
1086	  continue;
1087	}
1088
1089      /* if we have a real file (and we're not just printing possibilities),
1090	 then this is where we want to exit */
1091
1092      if (! *dirname || isspace (*dirname))
1093	{
1094	  if (! S_ISREG (mode))
1095	    {
1096	      errnum = ERR_BAD_FILETYPE;
1097	      return 0;
1098	    }
1099
1100	  filepos = 0;
1101	  filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1102
1103	  /* If this is a new stat data and size is > 4GB set filemax to
1104	   * maximum
1105	   */
1106	  if (INFO->current_ih->ih_version == ITEM_VERSION_2
1107	      && ((struct stat_data *) INFO->current_item)->sd_size_hi > 0)
1108	    filemax = 0xffffffff;
1109
1110	  INFO->fileinfo.k_dir_id = dir_id;
1111	  INFO->fileinfo.k_objectid = objectid;
1112	  return next_key ();
1113	}
1114
1115      /* continue with the file/directory name interpretation */
1116      while (*dirname == '/')
1117	dirname++;
1118      if (! S_ISDIR (mode))
1119	{
1120	  errnum = ERR_BAD_FILETYPE;
1121	  return 0;
1122	}
1123      for (rest = dirname; (ch = *rest) && ! isspace (ch) && ch != '/'; rest++);
1124      *rest = 0;
1125
1126# ifndef STAGE1_5
1127      if (print_possibilities && ch != '/')
1128	do_possibilities = 1;
1129# endif /* ! STAGE1_5 */
1130
1131      while (1)
1132	{
1133	  char *name_end;
1134	  int num_entries;
1135
1136	  if (! next_key ())
1137	    return 0;
1138#ifdef REISERDEBUG
1139	  printf ("ih: key %d:%d:%d:%d version:%d\n",
1140		  INFO->current_ih->ih_key.k_dir_id,
1141		  INFO->current_ih->ih_key.k_objectid,
1142		  INFO->current_ih->ih_key.u.v1.k_offset,
1143		  INFO->current_ih->ih_key.u.v1.k_uniqueness,
1144		  INFO->current_ih->ih_version);
1145#endif /* REISERDEBUG */
1146
1147	  if (INFO->current_ih->ih_key.k_objectid != objectid)
1148	    break;
1149
1150	  name_end = INFO->current_item + INFO->current_ih->ih_item_len;
1151	  de_head = (struct reiserfs_de_head *) INFO->current_item;
1152	  num_entries = INFO->current_ih->u.ih_entry_count;
1153	  while (num_entries > 0)
1154	    {
1155	      char *filename = INFO->current_item + de_head->deh_location;
1156	      char  tmp = *name_end;
1157	      if ((de_head->deh_state & DEH_Visible))
1158		{
1159		  int cmp;
1160		  /* Directory names in ReiserFS are not null
1161		   * terminated.  We write a temporary 0 behind it.
1162		   * NOTE: that this may overwrite the first block in
1163		   * the tree cache.  That doesn't hurt as long as we
1164		   * don't call next_key () in between.
1165		   */
1166		  *name_end = 0;
1167		  cmp = substring (dirname, filename);
1168		  *name_end = tmp;
1169# ifndef STAGE1_5
1170		  if (do_possibilities)
1171		    {
1172		      if (cmp <= 0)
1173			{
1174			  if (print_possibilities > 0)
1175			    print_possibilities = -print_possibilities;
1176			  *name_end = 0;
1177			  print_a_completion (filename);
1178			  *name_end = tmp;
1179			}
1180		    }
1181		  else
1182# endif /* ! STAGE1_5 */
1183		    if (cmp == 0)
1184		      goto found;
1185		}
1186	      /* The beginning of this name marks the end of the next name.
1187	       */
1188	      name_end = filename;
1189	      de_head++;
1190	      num_entries--;
1191	    }
1192	}
1193
1194# ifndef STAGE1_5
1195      if (print_possibilities < 0)
1196	return 1;
1197# endif /* ! STAGE1_5 */
1198
1199      errnum = ERR_FILE_NOT_FOUND;
1200      *rest = ch;
1201      return 0;
1202
1203    found:
1204
1205      *rest = ch;
1206      dirname = rest;
1207
1208      parent_dir_id = dir_id;
1209      parent_objectid = objectid;
1210      dir_id = de_head->deh_dir_id;
1211      objectid = de_head->deh_objectid;
1212    }
1213}
1214
1215int
1216reiserfs_embed (unsigned long long *start_sector, int needed_sectors)
1217{
1218  struct reiserfs_super_block super;
1219  int num_sectors;
1220
1221  if (! devread (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS, 0,
1222		 sizeof (struct reiserfs_super_block), (char *) &super))
1223    return 0;
1224
1225  *start_sector = 1; /* reserve first sector for stage1 */
1226  if ((substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1227       || substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1228       || substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) <= 0)
1229      && (/* check that this is not a super block copy inside
1230	   * the journal log */
1231	  super.s_journal_block * super.s_blocksize
1232	  > REISERFS_DISK_OFFSET_IN_BYTES))
1233    num_sectors = (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1234  else
1235    num_sectors = (REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1236
1237  return (needed_sectors <= num_sectors);
1238}
1239#endif /* FSYS_REISERFS */
1240