diff options
author | Theodore Ts'o <tytso@mit.edu> | 2000-08-14 14:25:19 +0000 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2000-08-14 14:25:19 +0000 |
commit | 3b5386dca8e0008e13351be5de6323576329aa58 (patch) | |
tree | ad19dfd7728af19f4f76480737095c4c459ef964 /e2fsck | |
parent | b8d164cd2412edc65ba47c573f2abcc254dbbbc9 (diff) | |
download | e2fsprogs-3b5386dca8e0008e13351be5de6323576329aa58.tar.gz |
Many files:
journal.c: implement loading of ext3 journal for recovery code
problem.c (fix_problem): return answer from PR_AFTER_CODE to caller.
Add journal problems.
recovery.c (journal_recover): user-space ext3 journal recovery code
unix.c (main) : check journal and do recovery in separate steps
jfs.h, recovery.c: Files ext3 kernel code.
jfs_compat.h: Compatibility header file to allow kernel code to be
linked to e2fsck.
Diffstat (limited to 'e2fsck')
-rw-r--r-- | e2fsck/ChangeLog | 11 | ||||
-rw-r--r-- | e2fsck/Makefile.in | 17 | ||||
-rw-r--r-- | e2fsck/e2fsck.8.in | 4 | ||||
-rw-r--r-- | e2fsck/e2fsck.h | 3 | ||||
-rw-r--r-- | e2fsck/jfs.h | 531 | ||||
-rw-r--r-- | e2fsck/jfs_compat.h | 54 | ||||
-rw-r--r-- | e2fsck/journal.c | 660 | ||||
-rw-r--r-- | e2fsck/message.c | 8 | ||||
-rw-r--r-- | e2fsck/problem.c | 63 | ||||
-rw-r--r-- | e2fsck/problem.h | 35 | ||||
-rw-r--r-- | e2fsck/recovery.c | 428 | ||||
-rw-r--r-- | e2fsck/unix.c | 53 |
12 files changed, 1792 insertions, 75 deletions
diff --git a/e2fsck/ChangeLog b/e2fsck/ChangeLog index 2c4efaab..86bdc2d9 100644 --- a/e2fsck/ChangeLog +++ b/e2fsck/ChangeLog @@ -1,3 +1,14 @@ +2000-07-12 Andreas Dilger <adilger@turbolinux.com> + + * journal.c: implement loading of ext3 journal for recovery code + + * problem.c (fix_problem): return answer from PR_AFTER_CODE to caller. + Add journal problems. + + * recovery.c (journal_recover): user-space ext3 journal recovery code + + * unix.c (main) : check journal and do recovery in separate steps + 2000-08-07 <tytso@snap.thunk.org> * unix.c (calc_percent): Make sure that we don't take a floating diff --git a/e2fsck/Makefile.in b/e2fsck/Makefile.in index 9cbb3a0f..f5abb2f2 100644 --- a/e2fsck/Makefile.in +++ b/e2fsck/Makefile.in @@ -56,14 +56,14 @@ PROFILED_DEPLIBS= $(PROFILED_LIBEXT2FS) $(PROFILED_LIBCOM_ERR) \ OBJS= unix.o e2fsck.o super.o pass1.o pass1b.o pass2.o pass3.o pass4.o \ pass5.o journal.o swapfs.o badblocks.o util.o dirinfo.o ehandler.o \ - problem.o message.o $(MTRACE_OBJ) + problem.o message.o recovery.o $(MTRACE_OBJ) PROFILED_OBJS= profiled/unix.o profiled/e2fsck.o profiled/super.o \ profiled/pass1.o profiled/pass1b.o \ profiled/pass2.o profiled/pass3.o profiled/pass4.o profiled/pass5.o \ profiled/journal.o profiled/badblocks.o profiled/util.o \ profiled/dirinfo.o profiled/ehandler.o profiled/message.o \ - profiled/problem.o profiled/swapfs.o + profiled/problem.o profiled/swapfs.o profiled/recovery.o SRCS= $(srcdir)/e2fsck.c \ $(srcdir)/super.c \ @@ -74,6 +74,7 @@ SRCS= $(srcdir)/e2fsck.c \ $(srcdir)/pass4.c \ $(srcdir)/pass5.c \ $(srcdir)/journal.c \ + $(srcdir)/recovery.c \ $(srcdir)/badblocks.c \ $(srcdir)/util.c \ $(srcdir)/unix.c \ @@ -157,15 +158,15 @@ distclean: clean # Makefile dependencies follow. This must be the last section in # the Makefile.in file # -e2fsck.o: $(srcdir)/e2fsck.c $(srcdir)/e2fsck.h \ +e2fsck.o: $(srcdir)/e2fsck.c $(srcdir)/e2fsck.h $(srcdir)/jfs_compat.h \ $(top_srcdir)/lib/ext2fs/ext2fs.h $(top_srcdir)/lib/et/com_err.h \ $(top_srcdir)/lib/ext2fs/ext2_io.h $(top_builddir)/lib/ext2fs/ext2_err.h \ - $(top_srcdir)/lib/ext2fs/bitops.h $(srcdir)/problem.h + $(top_srcdir)/lib/ext2fs/bitops.h $(srcdir)/problem.h $(srcdir)/jfs.h super.o: $(srcdir)/super.c $(top_srcdir)/lib/uuid/uuid.h $(srcdir)/e2fsck.h \ $(top_srcdir)/lib/ext2fs/ext2fs.h $(top_srcdir)/lib/et/com_err.h \ $(top_srcdir)/lib/ext2fs/ext2_io.h $(top_builddir)/lib/ext2fs/ext2_err.h \ $(top_srcdir)/lib/ext2fs/bitops.h $(srcdir)/problem.h -pass1.o: $(srcdir)/pass1.c $(srcdir)/e2fsck.h \ +pass1.o: $(srcdir)/pass1.c $(srcdir)/e2fsck.h $(srcdir)/jfs_compat.h \ $(top_srcdir)/lib/ext2fs/ext2fs.h $(top_srcdir)/lib/et/com_err.h \ $(top_srcdir)/lib/ext2fs/ext2_io.h $(top_builddir)/lib/ext2fs/ext2_err.h \ $(top_srcdir)/lib/ext2fs/bitops.h $(srcdir)/problem.h @@ -189,6 +190,12 @@ pass5.o: $(srcdir)/pass5.c $(srcdir)/e2fsck.h \ $(top_srcdir)/lib/ext2fs/ext2fs.h $(top_srcdir)/lib/et/com_err.h \ $(top_srcdir)/lib/ext2fs/ext2_io.h $(top_builddir)/lib/ext2fs/ext2_err.h \ $(top_srcdir)/lib/ext2fs/bitops.h $(srcdir)/problem.h +journal.o: $(srcdir)/journal.c $(srcdir)/jfs_compat.h $(srcdir)/e2fsck.h \ + $(top_srcdir)/lib/ext2fs/ext2fs.h $(top_srcdir)/lib/et/com_err.h \ + $(top_srcdir)/lib/ext2fs/ext2_io.h $(top_builddir)/lib/ext2fs/ext2_err.h \ + $(srcdir)/jfs.h +recovery.o: $(srcdir)/recovery.c $(srcdir)/jfs_compat.h $(srcdir)/e2fsck.h \ + $(srcdir)/jfs.h badblocks.o: $(srcdir)/badblocks.c $(top_srcdir)/lib/et/com_err.h \ $(srcdir)/e2fsck.h $(top_srcdir)/lib/ext2fs/ext2fs.h \ $(top_srcdir)/lib/ext2fs/ext2_io.h $(top_builddir)/lib/ext2fs/ext2_err.h \ diff --git a/e2fsck/e2fsck.8.in b/e2fsck/e2fsck.8.in index a72194ac..c76ff036 100644 --- a/e2fsck/e2fsck.8.in +++ b/e2fsck/e2fsck.8.in @@ -29,7 +29,9 @@ e2fsck \- check a Linux second extended file system .I device .SH DESCRIPTION .B e2fsck -is used to check a Linux second extended file system. +is used to check a Linux second extended file system (e2fs). E2fsck also +supports ext2 filesystems countaining a journal, which are +also sometimes known as ext3 filesystems. .TP .I device is the special file corresponding to the device (e.g diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h index 6b69ed78..31c0a03d 100644 --- a/e2fsck/e2fsck.h +++ b/e2fsck/e2fsck.h @@ -283,7 +283,8 @@ extern const char *ehandler_operation(const char *op); extern void ehandler_init(io_channel channel); /* journal.c */ -extern int e2fsck_run_ext3_journal(const char *device); +extern int e2fsck_check_ext3_journal(e2fsck_t ctx); +extern int e2fsck_run_ext3_journal(e2fsck_t ctx); /* pass1.c */ extern void e2fsck_use_inode_shortcuts(e2fsck_t ctx, int bool); diff --git a/e2fsck/jfs.h b/e2fsck/jfs.h new file mode 100644 index 00000000..2d089773 --- /dev/null +++ b/e2fsck/jfs.h @@ -0,0 +1,531 @@ +/* + * linux/include/linux/jfs.h + * + * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 + * + * Copyright 1998 Red Hat corp --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Definitions for transaction data structures for the buffer cache + * filesystem journaling support. + */ + +#ifndef _LINUX_JFS_H +#define _LINUX_JFS_H + +#ifndef __KERNEL__ +#include "jfs_compat.h" +#endif + +/* + * Debug code + */ + +/* #define JFS_DEBUG */ + +#ifdef JFS_DEBUG +extern int jfs_enable_debug; + +#define jfs_debug(n, f, a...) \ + do { \ + if ((n) <= jfs_enable_debug) { \ + printk (KERN_DEBUG "JFS DEBUG: (%s, %d): %s: ", \ + __FILE__, __LINE__, __FUNCTION__); \ + printk (f, ## a); \ + } \ + } while (0) +#else +#define jfs_debug(f, a...) /**/ +#endif + +#define JFS_MIN_JOURNAL_BLOCKS 1024 + +/* + * Internal structures used by the logging mechanism: + */ + +#define JFS_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */ + + +/* + * On-disk structures + */ + +/* + * Descriptor block types: + */ + +#define JFS_DESCRIPTOR_BLOCK 1 +#define JFS_COMMIT_BLOCK 2 +#define JFS_SUPERBLOCK 3 + +/* + * Standard header for all descriptor blocks: + */ +typedef struct journal_header_s +{ + __u32 h_magic; + __u32 h_blocktype; + __u32 h_sequence; +} journal_header_t; + + +/* + * The block tag: used to describe a single buffer in the journal + */ +typedef struct journal_block_tag_s +{ + __u32 t_blocknr; /* The on-disk block number */ + __u32 t_flags; /* See below */ +} journal_block_tag_t; + +/* Definitions for the journal tag flags word: */ +#define JFS_FLAG_ESCAPE 1 /* on-disk block is escaped */ +#define JFS_FLAG_SAME_UUID 2 /* block has same uuid as previous */ +#define JFS_FLAG_DELETED 4 /* block deleted by this transaction */ +#define JFS_FLAG_LAST_TAG 8 /* last tag in this descriptor block */ + + +/* + * The journal superblock + */ +typedef struct journal_superblock_s +{ + journal_header_t s_header; + + /* Static information describing the journal */ + __u32 s_blocksize; /* journal device blocksize */ + __u32 s_maxlen; /* total blocks in journal file */ + __u32 s_first; /* first block of log information */ + + /* Dynamic information describing the current state of the log */ + __u32 s_sequence; /* first commit ID expected in log */ + __u32 s_start; /* blocknr of start of log */ + +} journal_superblock_t; + +#ifdef __KERNEL__ + +#include <asm/semaphone.h> +#include <linux/fs.h> + + +#define J_ASSERT(assert) \ + do { if (!(assert)) { \ + printk (KERN_CRIT \ + "Assertion failure in %s() at %s line %d: " \ + "\"%s\"\n", \ + __FUNCTION__, __FILE__, __LINE__, # assert); \ + * ((char *) 0) = 0; \ + } } while (0) + + + +/* The handle_t type represents a single atomic update being performed + * by some process. All filesystem modifications made by the process go + * through this handle. Recursive operations (such as quota operations) + * are gathered into a single update. + * + * The buffer credits field is used to account for journaled buffers + * being modified by the running process. To ensure that there is + * enough log space for all outstanding operations, we need to limit the + * number of outstanding buffers possible at any time. When the + * operation completes, any buffer credits not used are credited back to + * the transaction, so that at all times we know how many buffers the + * outstanding updates on a transaction might possibly touch. */ + +struct handle_s +{ + /* Which compound transaction is this update a part of? */ + transaction_t * h_transaction; + + /* Number of remaining buffers we are allowed to dirty: */ + int h_buffer_credits; + + /* Reference count on this handle */ + int h_ref; + + /* Flags */ + unsigned int h_sync : 1; /* sync-on-close */ +}; + + +/* The transaction_t type is the guts of the journaling mechanism. It + * tracks a compound transaction through its various states: + * + * RUNNING: accepting new updates + * LOCKED: Updates still running but we don't accept new ones + * RUNDOWN: Updates are tidying up but have finished requesting + * new buffers to modify (state not used for now) + * FLUSH: All updates complete, but we are still writing to disk + * COMMIT: All data on disk, writing commit record + * FINISHED: We still have to keep the transaction for checkpointing. + * + * The transaction keeps track of all of the buffers modified by a + * running transaction, and all of the buffers committed but not yet + * flushed to home for finished transactions. + */ + +struct transaction_s +{ + /* Pointer to the journal for this transaction. */ + journal_t * t_journal; + + /* Sequence number for this transaction */ + tid_t t_tid; + + /* Transaction's current state */ + enum { + T_RUNNING, + T_LOCKED, + T_RUNDOWN, + T_FLUSH, + T_COMMIT, + T_FINISHED + } t_state; + + /* Where in the log does this transaction's commit start? */ + unsigned long t_log_start; + + /* Doubly-linked circular list of all inodes owned by this + transaction */ + struct inode * t_ilist; + + /* Number of buffers on the t_buffers list */ + int t_nr_buffers; + + /* Doubly-linked circular list of all buffers reserved but not + yet modified by this transaction */ + struct buffer_head * t_reserved_list; + + /* Doubly-linked circular list of all metadata buffers owned by this + transaction */ + struct buffer_head * t_buffers; + + /* Doubly-linked circular list of all data buffers still to be + flushed before this transaction can be committed */ + struct buffer_head * t_datalist; + + /* Doubly-linked circular list of all forget buffers (superceded + buffers which we can un-checkpoint once this transaction + commits) */ + struct buffer_head * t_forget; + + /* Doubly-linked circular list of all buffers still to be + flushed before this transaction can be checkpointed */ + struct buffer_head * t_checkpoint_list; + + /* Doubly-linked circular list of temporary buffers currently + undergoing IO in the log */ + struct buffer_head * t_iobuf_list; + + /* Doubly-linked circular list of metadata buffers being + shadowed by log IO. The IO buffers on the iobuf list and the + shadow buffers on this list match each other one for one at + all times. */ + struct buffer_head * t_shadow_list; + + /* Doubly-linked circular list of control buffers being written + to the log. */ + struct buffer_head * t_log_list; + + /* Number of outstanding updates running on this transaction */ + int t_updates; + + /* Number of buffers reserved for use by all handles in this + * transaction handle but not yet modified. */ + int t_outstanding_credits; + + /* Wait queue to wait for updates to complete */ + struct wait_queue * t_wait; + + /* Forward and backward links for the circular list of all + * transactions awaiting checkpoint */ + transaction_t *t_cpnext, *t_cpprev; + + /* When will the transaction expire (become due for commit), in + * jiffies ? */ + unsigned long t_expires; +}; +#endif /* __KERNEL__ */ + + +/* The journal_t maintains all of the journaling state information for a + * single filesystem. It is linked to from the fs superblock structure. + * + * We use the journal_t to keep track of all outstanding transaction + * activity on the filesystem, and to manage the state of the log + * writing process. */ + +struct journal_s +{ + /* General journaling state flags */ + unsigned long j_flags; + + /* The superblock buffer */ + struct buffer_head * j_sb_buffer; + journal_superblock_t * j_superblock; + +#ifdef __KERNEL__ + /* Transactions: The current running transaction... */ + transaction_t * j_running_transaction; + + /* ... the transaction we are pushing to disk ... */ + transaction_t * j_committing_transaction; + + /* ... and a linked circular list of all transactions waiting + * for checkpointing. */ + transaction_t * j_checkpoint_transactions; + + /* Wait queue for locking of the journal structure. */ + struct wait_queue * j_wait_lock; + + /* Wait queue for waiting for a locked transaction to start + committing */ + struct wait_queue * j_wait_transaction_locked; + + /* Wait queue for waiting for checkpointing to complete */ + struct wait_queue * j_wait_logspace; + + /* Wait queue for waiting for commit to complete */ + struct wait_queue * j_wait_done_commit; + + /* Wait queue to trigger checkpointing */ + struct wait_queue * j_wait_checkpoint; + + /* Wait queue to trigger commit */ + struct wait_queue * j_wait_commit; + + /* Semaphore for locking against concurrent checkpoints */ + struct semaphore j_checkpoint_sem; + + /* Journal running state: */ + /* The lock flag is *NEVER* touched from interrupts. */ + unsigned int j_locked : 1; + + /* Pointer to the current commit thread for this journal */ + struct task_struct * j_task; + + /* The timer used to wakeup the commit thread: */ + struct timer_list * j_commit_timer; + int j_commit_timer_active; +#endif + + /* Journal head: identifies the first unused block in the journal. */ + unsigned long j_head; + + /* Journal tail: identifies the oldest still-used block in the + * journal. */ + unsigned long j_tail; + + /* Journal free: how many free blocks are there in the journal? */ + unsigned long j_free; + + /* Journal start and end: the block numbers of the first usable + * block and one beyond the last usable block in the journal. */ + unsigned long j_first, j_last; + + /* Device, blocksize and starting block offset for the location + * where we store the journal. */ + kdev_t j_dev; + int j_blocksize; + unsigned int j_blk_offset; + + /* Total maximum capacity of the journal region on disk. */ + unsigned int j_maxlen; + + /* Optional inode where we store the journal. If present, all + * journal block numbers are mapped into this inode via + * bmap(). */ + struct inode * j_inode; + + /* Sequence number of the oldest transaction in the log */ + tid_t j_tail_sequence; + /* Sequence number of the next transaction to grant */ + tid_t j_transaction_sequence; + /* Sequence number of the most recently committed transaction */ + tid_t j_commit_sequence; + /* Sequence number of the most recent transaction wanting commit */ + tid_t j_commit_request; + + /* Journal uuid: identifies the object (filesystem, LVM volume + * etc) backed by this journal. This will eventually be + * replaced by an array of uuids, allowing us to index multiple + * devices within a single journal and to perform atomic updates + * across them. */ + + __u8 j_uuid[16]; + + /* Maximum number of metadata buffers to allow in a single + * compound commit transaction */ + int j_max_transaction_buffers; + + /* What is the maximum transaction lifetime before we begin a + * commit? */ + unsigned long j_commit_interval; + +}; + +#ifdef __KERNEL__ + +/* + * Journal flag definitions + */ +#define JFS_UNMOUNT 1 /* Journal thread is being destroyed */ +#define JFS_SYNC 2 /* Perform synchronous transaction commits */ + +/* + * Journaling internal variables/parameters + */ + +extern int journal_flush_nr_buffers; + + +/* + * Function declarations for the journaling transaction and buffer + * management + */ + +/* Filing buffers */ +extern void journal_unfile_buffer(struct buffer_head *); +extern void journal_refile_buffer(struct buffer_head *); +extern void journal_file_buffer(struct buffer_head *, transaction_t *, int); +extern void journal_clean_data_list(transaction_t *transaction); + +/* Log buffer allocation */ +extern struct buffer_head * journal_get_descriptor_buffer(journal_t *); +extern unsigned long journal_next_log_block(journal_t *); + +/* Commit management */ +extern void journal_commit_transaction(journal_t *); + +/* Checkpoint list management */ +extern void journal_remove_checkpoint(struct buffer_head *); +extern void journal_insert_checkpoint(struct buffer_head *, transaction_t *); + +/* Buffer IO */ +extern int +journal_write_metadata_buffer(transaction_t *transaction, + struct buffer_head *bh_in, + struct buffer_head **bh_out, + int blocknr); + +/* Create and destroy transactions */ +extern transaction_t * get_transaction (journal_t *); +extern void put_transaction (transaction_t *); + +/* Notify state transitions (called by the log writer thread): */ +extern int set_transaction_state (transaction_t *, int); + + +/* + * Transaction locking + * + * We need to lock the journal during transaction state changes so that + * nobody ever tries to take a handle on the running transaction while + * we are in the middle of moving it to the commit phase. + * + * Note that the locking is completely interrupt unsafe. We never touch + * journal structures from interrupts. + */ + +static inline void __wait_on_journal (journal_t * journal) +{ + while (journal->j_locked) + sleep_on (&journal->j_wait_lock); +} + + +/* Journal locking. In 2.2, we assume that the kernel lock is already + * held. */ +static inline void lock_journal (journal_t * journal) +{ + if (journal->j_locked) + __wait_on_journal(journal); + journal->j_locked = 1; +} + +static inline int try_lock_journal (journal_t * journal) +{ + if (journal->j_locked) + return 1; + journal->j_locked = 1; + return 0; +} + +static inline void unlock_journal (journal_t * journal) +{ + J_ASSERT (journal->j_locked); + journal->j_locked = 0; + wake_up(&journal->j_wait_lock); +} + +/* This function is gross, but unfortunately we need it as long as + * existing filesystems want to guard against races by testing + * bh->b_count. @@@ Remove this? We no longer abuse b_count so badly! + */ + +static inline int journal_is_buffer_shared(struct buffer_head *bh) +{ + int count = bh->b_count; + J_ASSERT (count >= 1); + return (count > 1); +} + +/* Debugging code only: */ + +#define jfs_ENOSYS() \ +do { \ + printk (KERN_ERR "JFS unimplemented function " __FUNCTION__); \ + current->state = TASK_UNINTERRUPTIBLE; \ + schedule(); \ +} while (1) + +/* The log thread user interface: + * + * Request space in the current transaction, and force transaction commit + * transitions on demand. + */ + +extern int log_space_left (journal_t *); /* Called with journal locked */ +extern void log_start_commit (journal_t *, transaction_t *); +extern void log_wait_commit (journal_t *, tid_t); +extern int log_do_checkpoint (journal_t *, int); + +extern void log_wait_for_space(journal_t *, int nblocks); +extern void journal_drop_transaction(journal_t *, transaction_t *); + + +/* The journaling code user interface: + * + * Create and destroy handles + * Register buffer modifications against the current transaction. + */ + +extern handle_t *journal_start (journal_t *, int nblocks); +extern int journal_restart (handle_t *, int nblocks); +extern int journal_extend (handle_t *, int nblocks); +extern int journal_get_write_access (handle_t *, struct buffer_head *); +extern int journal_get_create_access (handle_t *, struct buffer_head *); +extern int journal_get_undo_access (handle_t *, struct buffer_head *); +extern int journal_dirty_data (handle_t *, struct buffer_head *); +extern int journal_dirty_metadata (handle_t *, struct buffer_head *); +extern void journal_release_buffer (handle_t *, struct buffer_head *); +extern void journal_forget (handle_t *, struct buffer_head *); +extern void journal_sync_buffer (struct buffer_head *); +extern int journal_stop (handle_t *); +extern int journal_flush (journal_t *); + +extern journal_t * journal_init_dev (kdev_t, int start, int len, int bsize); +extern journal_t * journal_init_inode (struct inode *); +extern int journal_create (journal_t *); +extern int journal_load (journal_t *); +extern void journal_release (journal_t *); +extern void journal_update_superblock (journal_t *, int); +#endif /* __KERNEL__ */ +extern int journal_recover (journal_t *); + +#endif /* _LINUX_JFS_H */ diff --git a/e2fsck/jfs_compat.h b/e2fsck/jfs_compat.h new file mode 100644 index 00000000..1fa49ca8 --- /dev/null +++ b/e2fsck/jfs_compat.h @@ -0,0 +1,54 @@ + +#ifndef _JFS_COMPAT_H +#define _JFS_COMPAT_H + +#include "e2fsck.h" +#include <errno.h> + +#define printk printf +#define KERN_ERR "" +#define KERN_DEBUG "" + +#define READ 0 +#define WRITE 1 + +typedef int tid_t; +typedef e2fsck_t kdev_t; +typedef struct journal_s journal_t; + +struct buffer_head { + char b_data[8192]; + e2fsck_t b_ctx; + io_channel b_io; + int b_size; + blk_t b_blocknr; + int b_dirty; + int b_uptodate; + int b_err; +}; + +struct inode { + e2fsck_t i_ctx; + ino_t i_ino; + struct ext2_inode i_ext2; +}; + +int bmap(struct inode *inode, int block); +struct buffer_head *getblk(e2fsck_t ctx, blk_t blocknr, int blocksize); +void ll_rw_block(int rw, int dummy, struct buffer_head *bh); +void mark_buffer_dirty(struct buffer_head *bh, int dummy); +void brelse(struct buffer_head *bh); +int buffer_uptodate(struct buffer_head *bh); +void wait_on_buffer(struct buffer_head *bh); +#define fsync_dev(dev) do {} while(0) +#define buffer_req(bh) 1 +#define do_readahead(journal, start) do {} while(0) +#define J_ASSERT(assert) \ + do { if (!(assert)) { \ + printf ("Assertion failure in %s() at %s line %d: " \ + "\"%s\"\n", \ + __FUNCTION__, __FILE__, __LINE__, # assert); \ + exit(FSCK_ERROR); \ + } } while (0) + +#endif /* _JFS_COMPAT_H */ diff --git a/e2fsck/journal.c b/e2fsck/journal.c index 15b86cce..dc5660c7 100644 --- a/e2fsck/journal.c +++ b/e2fsck/journal.c @@ -1,72 +1,646 @@ /* * journal.c --- code for handling the "ext3" journal + * + * Copyright (C) 2000 Andreas Dilger + * Copyright (C) 2000 Theodore Ts'o + * + * Parts of the code are based on fs/jfs/journal.c by Stephen C. Tweedie + * Copyright (C) 1999 Red Hat Software + * + * This file may be redistributed under the terms of the + * GNU General Public License version 2 or at your discretion + * any later version. */ -#include <errno.h> +#ifdef HAVE_SYS_MOUNT_H +#include <sys/mount.h> +#define MNT_FL (MS_MGC_VAL | MS_RDONLY) +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif -#include "e2fsck.h" +#include "jfs.h" +#include "problem.h" +#include "uuid/uuid.h" -/* - * This is a list of directories to try. The first element may get - * replaced by a mktemp'ed generated temp directory if possible. - */ -static char *dirlist[] = { "/mnt", "/tmp", "/root", "/boot", 0 }; +#ifdef JFS_DEBUG +static int bh_count = 0; +int jfs_enable_debug = 2; +#endif + +int bmap(struct inode *inode, int block) +{ + int retval; + blk_t phys; + + retval = ext2fs_bmap(inode->i_ctx->fs, inode->i_ino, &inode->i_ext2, + NULL, 0, block, &phys); + + if (retval) + com_err(inode->i_ctx->device_name, retval, + _("bmap journal inode %ld, block %d\n"), + inode->i_ino, block); + + return phys; +} + +struct buffer_head *getblk(e2fsck_t ctx, blk_t blocknr, int blocksize) +{ + struct buffer_head *bh; + + bh = e2fsck_allocate_memory(ctx, sizeof(*bh), "block buffer"); + if (!bh) + return NULL; + + jfs_debug(4, "getblk for block %lu (%d bytes)(total %d)\n", + blocknr, blocksize, ++bh_count); + + bh->b_ctx = ctx; + bh->b_size = blocksize; + bh->b_blocknr = blocknr; + + return bh; +} + +void ll_rw_block(int rw, int dummy, struct buffer_head *bh) +{ + int retval; + + if (rw == READ && !bh->b_uptodate) { + jfs_debug(3, "reading block %lu/%p\n", bh->b_blocknr, bh); + retval = io_channel_read_blk(bh->b_ctx->fs->io, bh->b_blocknr, + 1, bh->b_data); + if (retval) { + com_err(bh->b_ctx->device_name, retval, + "while reading block %ld\n", bh->b_blocknr); + bh->b_err = retval; + return; + } + bh->b_uptodate = 1; + } else if (rw == WRITE && bh->b_dirty) { + jfs_debug(3, "writing block %lu/%p\n", bh->b_blocknr, bh); + retval = io_channel_write_blk(bh->b_ctx->fs->io, bh->b_blocknr, + 1, bh->b_data); + if (retval) { + com_err(bh->b_ctx->device_name, retval, + "while writing block %ld\n", bh->b_blocknr); + bh->b_err = retval; + return; + } + bh->b_dirty = 0; + bh->b_uptodate = 1; + } else + jfs_debug(3, "no-op %s for block %lu\n", + rw == READ ? "read" : "write", bh->b_blocknr); +} + +void mark_buffer_dirty(struct buffer_head *bh, int dummy) +{ + bh->b_dirty = dummy | 1; /* use dummy to avoid unused variable */ +} + +void brelse(struct buffer_head *bh) +{ + if (bh->b_dirty) + ll_rw_block(WRITE, 1, bh); + jfs_debug(3, "freeing block %lu/%p (total %d)\n", + bh->b_blocknr, bh, --bh_count); + ext2fs_free_mem((void **) &bh); +} + +int buffer_uptodate(struct buffer_head *bh) +{ + return bh->b_uptodate; +} + +void wait_on_buffer(struct buffer_head *bh) +{ + if (!bh->b_uptodate) + ll_rw_block(READ, 1, bh); +} + +static void e2fsck_clear_recover(e2fsck_t ctx, int error) +{ + struct ext2fs_sb *s = (struct ext2fs_sb *)ctx->fs->super; + + s->s_feature_incompat &= ~EXT3_FEATURE_INCOMPAT_RECOVER; + + /* if we had an error doing journal recovery, we need a full fsck */ + if (error) + s->s_state &= ~EXT2_VALID_FS; + ext2fs_mark_super_dirty(ctx->fs); +} + +static int e2fsck_journal_init_inode(e2fsck_t ctx, struct ext2fs_sb *s, + ino_t journal_inum, journal_t **journal) +{ + struct inode *inode; + const char *cmdname = ctx->program_name; + struct buffer_head *bh; + blk_t start; + int retval; + + jfs_debug(1, "Using journal inode %lu\n", journal_inum); + *journal = e2fsck_allocate_memory(ctx, sizeof(journal_t), "journal"); + if (!*journal) { + return EXT2_ET_NO_MEMORY; + } + + inode = e2fsck_allocate_memory(ctx, sizeof(*inode), "journal inode"); + if (!inode) { + retval = EXT2_ET_NO_MEMORY; + goto exit_journal; + } + + inode->i_ctx = ctx; + inode->i_ino = journal_inum; + retval = ext2fs_read_inode(ctx->fs, journal_inum, &inode->i_ext2); + if (retval) + goto exit_inode; + + (*journal)->j_dev = ctx; + (*journal)->j_inode = inode; + (*journal)->j_blocksize = ctx->fs->blocksize; + (*journal)->j_maxlen = inode->i_ext2.i_size / (*journal)->j_blocksize; + + if (!inode->i_ext2.i_links_count || + !LINUX_S_ISREG(inode->i_ext2.i_mode) || + (*journal)->j_maxlen < JFS_MIN_JOURNAL_BLOCKS || + (start = bmap(inode, 0)) == 0) { + retval = EXT2_ET_BAD_INODE_NUM; + goto exit_inode; + } + + bh = getblk(ctx, start, (*journal)->j_blocksize); + if (!bh) { + retval = EXT2_ET_NO_MEMORY; + goto exit_inode; + } + (*journal)->j_sb_buffer = bh; + (*journal)->j_superblock = (journal_superblock_t *)bh->b_data; + + return 0; + +exit_inode: + ext2fs_free_mem((void **)&inode); +exit_journal: + ext2fs_free_mem((void **)journal); + + return retval; +} + +static int e2fsck_get_journal(e2fsck_t ctx, journal_t **journal) +{ + char uuid_str[40]; + struct problem_context pctx; + struct ext2fs_sb *s = (struct ext2fs_sb *)ctx->fs->super; + int recover = s->s_feature_incompat & EXT3_FEATURE_INCOMPAT_RECOVER; + + clear_problem_context(&pctx); + + if (s->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL) { + if (s->s_journal_dev) { + pctx.num = s->s_journal_dev; + /* this problem aborts on -y, -p, unsupported on -n */ + if (!fix_problem(ctx, PR_0_JOURNAL_UNSUPP_DEV, &pctx)) + return EXT2_ET_UNSUPP_FEATURE; + s->s_journal_dev = 0; + s->s_state &= ~EXT2_VALID_FS; + ext2fs_mark_super_dirty(ctx->fs); + } + if (!uuid_is_null(s->s_journal_uuid)) { + uuid_unparse(s->s_journal_uuid, uuid_str); + pctx.str = uuid_str; + /* this problem aborts on -y, -p, unsupported on -n */ + if (!fix_problem(ctx, PR_0_JOURNAL_UNSUPP_UUID, &pctx)) + return EXT2_ET_UNSUPP_FEATURE; + uuid_clear(s->s_journal_uuid); + s->s_state &= ~EXT2_VALID_FS; + ext2fs_mark_super_dirty(ctx->fs); + } + if (!s->s_journal_inum) + return EXT2_ET_BAD_INODE_NUM; + } + + if (s->s_journal_dev) { + pctx.num = s->s_journal_dev; + if (!fix_problem(ctx, PR_0_JOURNAL_BAD_DEV, &pctx)) + return EXT2_ET_UNSUPP_FEATURE; + s->s_journal_dev = 0; + s->s_state &= ~EXT2_VALID_FS; + ext2fs_mark_super_dirty(ctx->fs); + } + if (!uuid_is_null(s->s_journal_uuid)) { + uuid_unparse(s->s_journal_uuid, uuid_str); + pctx.str = uuid_str; + if (!fix_problem(ctx, PR_0_JOURNAL_BAD_UUID, &pctx)) + return EXT2_ET_UNSUPP_FEATURE; + uuid_clear(s->s_journal_uuid); + s->s_state &= ~EXT2_VALID_FS; + ext2fs_mark_super_dirty(ctx->fs); + } + + return e2fsck_journal_init_inode(ctx, s, s->s_journal_inum, journal); +} + +static int e2fsck_journal_fix_bad_inode(e2fsck_t ctx, + struct problem_context *pctx) +{ + struct ext2fs_sb *s = (struct ext2fs_sb *)ctx->fs->super; + int recover = s->s_feature_incompat & EXT3_FEATURE_INCOMPAT_RECOVER; + int has_journal = s->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL; + + if (has_journal || s->s_journal_inum) { + /* The journal inode is bogus, remove and force full fsck */ + if (fix_problem(ctx, PR_0_JOURNAL_BAD_INODE, pctx)) { + struct ext2fs_sb *s =(struct ext2fs_sb *)ctx->fs->super; + + if (has_journal && s->s_journal_inum) + printf("*** ext3 journal has been deleted - " + "filesystem is now ext2 only ***\n\n"); + s->s_feature_compat &= ~EXT3_FEATURE_COMPAT_HAS_JOURNAL; + s->s_journal_inum = 0; + e2fsck_clear_recover(ctx, 1); + return 0; + } + return EXT2_ET_BAD_INODE_NUM; + } else if (recover) { + if (fix_problem(ctx, PR_0_JOURNAL_RECOVER_SET, pctx)) { + e2fsck_clear_recover(ctx, 1); + return 0; + } + return EXT2_ET_UNSUPP_FEATURE; + } + return 0; +} + +static int e2fsck_journal_fix_unsupported_super(e2fsck_t ctx, + struct problem_context *pctx) +{ + struct ext2fs_sb *s = (struct ext2fs_sb *)ctx->fs->super; + + /* Unsupported journal superblock - first choice is abort. + * Declining that gives the option to reset the superblock. + * + * Otherwise we get the chance to delete the journal, and + * failing that we abort because we can't handle this. + */ + if (s->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL && + fix_problem(ctx, PR_0_JOURNAL_UNSUPP_SUPER, pctx)) + return EXT2_ET_CORRUPT_SUPERBLOCK; + + if (e2fsck_journal_fix_bad_inode(ctx, pctx)) + return EXT2_ET_UNSUPP_FEATURE; + + return 0; +} + +static int e2fsck_journal_load(journal_t *journal) +{ + e2fsck_t ctx = journal->j_dev; + journal_superblock_t *jsb; + struct buffer_head *jbh = journal->j_sb_buffer; + struct problem_context pctx; + + clear_problem_context(&pctx); + + ll_rw_block(READ, 1, jbh); + if (jbh->b_err) { + com_err(ctx->device_name, jbh->b_err, + _("reading journal superblock\n")); + return jbh->b_err; + } + + jsb = journal->j_superblock; + /* If we don't even have JFS_MAGIC, we probably have a wrong inode */ + if (jsb->s_header.h_magic != htonl(JFS_MAGIC_NUMBER)) + return e2fsck_journal_fix_bad_inode(ctx, &pctx); + + if (jsb->s_header.h_blocktype != htonl(JFS_SUPERBLOCK) || + jsb->s_blocksize != htonl(journal->j_blocksize)) { + com_err(ctx->device_name, EXT2_ET_CORRUPT_SUPERBLOCK, + _("%s: no valid journal superblock found\n")); + return EXT2_ET_CORRUPT_SUPERBLOCK; + } + + if (jsb->s_header.h_blocktype != htonl(JFS_SUPERBLOCK)) { + pctx.num = ntohl(jsb->s_header.h_blocktype); + return e2fsck_journal_fix_unsupported_super(ctx, &pctx); + } + + if (ntohl(jsb->s_maxlen) < journal->j_maxlen) + journal->j_maxlen = ntohl(jsb->s_maxlen); + else if (ntohl(jsb->s_maxlen) > journal->j_maxlen) { + com_err(ctx->device_name, EXT2_ET_CORRUPT_SUPERBLOCK, + _("%s: journal too short\n")); + return EXT2_ET_CORRUPT_SUPERBLOCK; + } + + journal->j_tail_sequence = ntohl(jsb->s_sequence); + journal->j_tail = ntohl(jsb->s_start); + journal->j_first = ntohl(jsb->s_first); + journal->j_last = ntohl(jsb->s_maxlen); + + return 0; +} + +void e2fsck_journal_reset_super(e2fsck_t ctx, journal_superblock_t *jsb, + blk_t size) +{ + jsb->s_header.h_magic = htonl(JFS_MAGIC_NUMBER); + jsb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK); + jsb->s_blocksize = htonl(ctx->fs->blocksize); + jsb->s_maxlen = htonl(size); + jsb->s_first = 1; + jsb->s_sequence = htonl(1); +} + +static int e2fsck_journal_fix_corrupt_super(e2fsck_t ctx, journal_t *journal, + struct problem_context *pctx) +{ + struct ext2fs_sb *s = (struct ext2fs_sb *)ctx->fs->super; + int recover = s->s_feature_incompat & EXT3_FEATURE_INCOMPAT_RECOVER; + + pctx->num = journal->j_inode->i_ino; + + if (s->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL) { + if (fix_problem(ctx, PR_0_JOURNAL_BAD_SUPER, pctx)) { + journal_superblock_t *jsb = journal->j_superblock; + + e2fsck_journal_reset_super(ctx, jsb, journal->j_maxlen); + + journal->j_transaction_sequence = 1; + e2fsck_clear_recover(ctx, recover); + return 0; + } + return EXT2_ET_CORRUPT_SUPERBLOCK; + } else if (e2fsck_journal_fix_bad_inode(ctx, pctx)) + return EXT2_ET_CORRUPT_SUPERBLOCK; + + return 0; +} + +static void e2fsck_journal_release(e2fsck_t ctx, journal_t *journal, int reset) +{ + journal_superblock_t *jsb; + + if (!(ctx->options & E2F_OPT_READONLY)) { + jsb = journal->j_superblock; + jsb->s_sequence = htonl(journal->j_transaction_sequence); + if (reset) + jsb->s_start = 0; /* this marks the journal as empty */ + mark_buffer_dirty(journal->j_sb_buffer, 1); + } + brelse(journal->j_sb_buffer); + + if (journal->j_inode) + free(journal->j_inode); + ext2fs_free_mem((void **)&journal); +} + +int e2fsck_check_ext3_journal(e2fsck_t ctx) +{ + struct ext2fs_sb *s = (struct ext2fs_sb *)ctx->fs->super; + journal_t *journal; + int recover = s->s_feature_incompat & EXT3_FEATURE_INCOMPAT_RECOVER; + struct problem_context pctx; + int reset = 0; + int retval; + + /* If we don't have any journal features, don't do anything more */ + if (!(s->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL) && + !recover && s->s_journal_inum == 0 && s->s_journal_dev == 0 && + uuid_is_null(s->s_journal_uuid)) + return 0; + + clear_problem_context(&pctx); + pctx.num = s->s_journal_inum; + + retval = e2fsck_get_journal(ctx, &journal); + if (retval) { + if (retval == EXT2_ET_BAD_INODE_NUM) + return e2fsck_journal_fix_bad_inode(ctx, &pctx); + return retval; + } + + retval = e2fsck_journal_load(journal); + if (retval) { + if (retval == EXT2_ET_CORRUPT_SUPERBLOCK) + return e2fsck_journal_fix_corrupt_super(ctx, journal, + &pctx); + return retval; + } + + /* + * We want to make the flags consistent here. We will not leave with + * needs_recovery set but has_journal clear. We can't get in a loop + * with -y, -n, or -p, only if a user isn't making up their mind. + */ +no_has_journal: + if (!(s->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { + recover = s->s_feature_incompat & EXT3_FEATURE_INCOMPAT_RECOVER; + pctx.str = "inode"; + if (fix_problem(ctx, PR_0_JOURNAL_HAS_JOURNAL, &pctx)) { + if (recover && + !fix_problem(ctx, PR_0_JOURNAL_RECOVER_SET, &pctx)) + goto no_has_journal; + s->s_journal_inum = 0; + e2fsck_clear_recover(ctx, recover); + } else if (!(ctx->options & E2F_OPT_READONLY)) { + s->s_feature_compat |= EXT3_FEATURE_COMPAT_HAS_JOURNAL; + ext2fs_mark_super_dirty(ctx->fs); + } + } + + if (s->s_feature_compat & EXT3_FEATURE_COMPAT_HAS_JOURNAL && + !(s->s_feature_incompat & EXT3_FEATURE_INCOMPAT_RECOVER) && + journal->j_superblock->s_start != 0) { + if (fix_problem(ctx, PR_0_JOURNAL_RESET_JOURNAL, &pctx)) + reset = 1; + /* I refuse to enable recovery for journal */ + } + + e2fsck_journal_release(ctx, journal, reset); + return retval; +} + +static int e2fsck_recover_ext3_journal(e2fsck_t ctx) +{ + ext2_filsys fs = ctx->fs; + io_manager io_ptr = fs->io->manager; + int blocksize = fs->blocksize; + journal_t *journal; + int retval; + + retval = e2fsck_get_journal(ctx, &journal); + if (retval) + goto exit; + retval = e2fsck_journal_load(journal); + if (retval) + goto exit; + + retval = -journal_recover(journal); + + e2fsck_journal_release(ctx, journal, 1); + if (retval) + goto exit; + + /* Reload the filesystem context to get up-to-date data from disk + * because journal recovery will change the filesystem under us. + */ + ext2fs_close(fs); + retval = ext2fs_open(ctx->device_name, EXT2_FLAG_RW, + ctx->superblock, blocksize, io_ptr, &fs); + + if (retval) { + com_err(ctx->program_name, retval, + _("while trying to re-open %s"), + ctx->device_name); + exit(FSCK_ERROR); + } + ctx->fs = fs; + fs->priv_data = ctx; + + /* FIXME - In the future we will clean up the ophans here. + * For now, we need to force a full fsck to clean them up. + * We shouldn't have this problem in normal circumstances + * as the kernel recovery code should save us. + */ + if (fs->super->s_last_orphan) + fs->super->s_state &= ~EXT2_VALID_FS; + else + jfs_debug(1, "no orphan inodes to clean up\n"); + +exit: + e2fsck_clear_recover(ctx, retval); + ext2fs_close(ctx->fs); + return retval; +} + + +#define TEMPLATE "/tmp/ext3.XXXXXX" /* * This function attempts to mount and unmount an ext3 filesystem, * which is a cheap way to force the kernel to run the journal and - * handle the recovery for us. + * handle the recovery for us. If that fails, we need to recover + * the journal ourselves manually. */ -int e2fsck_run_ext3_journal(const char *device) +int e2fsck_run_ext3_journal(e2fsck_t ctx) { - int ret = 0; - char **cpp, *dir; - char template[] = "/tmp/ext3.XXXXXX"; +#ifdef __linux__ + ext2_filsys fs = ctx->fs; + char *dirlist[] = {"/mnt","/lost+found","/tmp","/root","/boot",0}; + int retval = 0; + int count = 0; + char template[] = TEMPLATE; + struct stat buf; char *tmpdir; + if (ctx->options & E2F_OPT_READONLY) { + printf("%s: won't do journal recovery while read-only\n", + ctx->device_name); + return EXT2_ET_FILE_RO; + } + + /* For now, non-root users and loop devices can't use kernel recovery */ + if (geteuid()||stat(ctx->device_name, &buf)||!S_ISBLK(buf.st_mode)) + goto manual_recover; + + printf(_("%s: trying for ext3 kernel journal recovery\n"), + ctx->device_name); /* * First try to make a temporary directory. This may fail if * the root partition is still mounted read-only. */ +newtemp: tmpdir = mktemp(template); if (tmpdir) { - ret = mkdir(template, 0700); - if (ret) - tmpdir = 0; - } - if (tmpdir) { - ret = mount(device, tmpdir, "ext3", 0xC0ED, NULL); - if (ret) { - ret = errno; - rmdir(tmpdir); - return (ret); + jfs_debug(2, "trying %s as ext3 temp mount point\n", tmpdir); + retval = mkdir(template, 0700); + if (retval) { + if (errno == EROFS) { + tmpdir = NULL; + template[0] = '\0'; + } else if (errno == EEXIST && count++ < 10) { + strcpy(template, TEMPLATE); + goto newtemp; + } else + goto manual_recover; } - } else { + } + + /* + * OK, creating a temporary directory didn't work. + * Let's try a list of possible temporary mountpoints. + */ + if (!tmpdir) { + dev_t rootdev; + char **cpp, *dir; + + if (stat("/", &buf)) + goto manual_recover; + + rootdev = buf.st_dev; + /* - * OK, creating a temporary directory didn't work. - * Let's try a list of possible temporary mountpoints. + * Check that dir is on the same device as root (no other + * filesystem is mounted there), and it's a directory. */ - for (cpp = dirlist; dir = *cpp; cpp++) { - ret = mount(device, dir, "ext3", 0xC0ED, NULL); - if (ret == 0) + for (cpp = dirlist; (dir = *cpp); cpp++) + if (stat(dir, &buf) == 0 && buf.st_dev == rootdev && + S_ISDIR(buf.st_mode)) { + tmpdir = dir; break; + } + } + + if (tmpdir) { + io_manager io_ptr = fs->io->manager; + int blocksize = fs->blocksize; + + jfs_debug(2, "using %s for ext3 mount\n", tmpdir); + ext2fs_close(fs); + /* FIXME - need to handle loop devices here */ + retval = mount(ctx->device_name, tmpdir, "ext3", MNT_FL, NULL); + if (retval) { + com_err(ctx->program_name, errno, + "when mounting %s", ctx->device_name); + if (template[0]) + rmdir(tmpdir); + + retval = ext2fs_open(ctx->device_name, EXT2_FLAG_RW, + ctx->superblock, blocksize, io_ptr, + &fs); + + if (retval) { + com_err(ctx->program_name, retval, + _("while trying to re-open %s"), + ctx->device_name); + exit(FSCK_ERROR); + } + fs->priv_data = ctx; + ctx->fs = fs; + goto manual_recover; } - if (!dir) + /* + * Now that it mounted cleanly, the filesystem will have been + * recovered, so we can now unmount it. + */ + retval = umount(tmpdir); + if (retval) return errno; + + /* + * Remove the temporary directory, if it was created. + */ + if (template[0]) + rmdir(tmpdir); + return 0; } - /* - * Now that it mounted cleanly, the filesystem will have been - * recovered, so we can now unmount it. - */ - ret = umount(device); - if (ret) - return errno; - /* - * Remove the temporary directory, if it was created. - */ - if (tmpdir) - rmdir(tmpdir); - return 0; +manual_recover: +#endif /* __linux__ */ + return e2fsck_recover_ext3_journal(ctx); } - diff --git a/e2fsck/message.c b/e2fsck/message.c index 3018cc00..529a486a 100644 --- a/e2fsck/message.c +++ b/e2fsck/message.c @@ -44,6 +44,7 @@ * the containing directory. * %s <str> miscellaneous string * %S backup superblock + * %X <num> hexadecimal format * * The following '@' expansions are supported: * @@ -378,6 +379,13 @@ static _INLINE_ void expand_percent_expression(ext2_filsys fs, char ch, case 's': printf("%s", ctx->str); break; + case 'X': +#ifdef EXT2_NO_64_TYPE + printf("0x%x", ctx->num); +#else + printf("0x%llx", ctx->num); +#endif + break; default: no_context: printf("%%%c", ch); diff --git a/e2fsck/problem.c b/e2fsck/problem.c index 6ddd7aa8..5a05387c 100644 --- a/e2fsck/problem.c +++ b/e2fsck/problem.c @@ -172,7 +172,62 @@ static const struct e2fsck_problem problem_table[] = { { PR_0_HURD_CLEAR_FILETYPE, N_("The Hurd does not support the filetype feature.\n"), - PROMPT_CLEAR, 0 }, + PROMPT_CLEAR, 0 }, + + /* Journal inode is invalid */ + { PR_0_JOURNAL_BAD_INODE, + N_("@S has a bad ext3 journal (@i %N).\n"), + PROMPT_CLEAR, PR_PREEN_OK }, + + /* Superblock has a journal device (which we can't handle yet) */ + { PR_0_JOURNAL_UNSUPP_DEV, + N_("@S has external ext3 journal device (unsupported).\n"), + PROMPT_ABORT, PR_NO_OK | PR_AFTER_CODE, PR_0_JOURNAL_BAD_DEV }, + + /* Superblock has a bad journal device */ + { PR_0_JOURNAL_BAD_DEV, + N_("@S has a bad ext3 journal (device %X).\n"), + PROMPT_CLEAR, PR_PREEN_OK }, + + /* Superblock has a journal UUID (which we can't handle yet) */ + { PR_0_JOURNAL_UNSUPP_UUID, + N_("@S has an ext3 journal UUID (unsupported).\n"), + PROMPT_ABORT, PR_NO_OK | PR_AFTER_CODE, PR_0_JOURNAL_BAD_UUID }, + + /* Superblock has a bad journal UUID */ + { PR_0_JOURNAL_BAD_UUID, + N_("@S has a bad ext3 journal (UUID %s).\n"), + PROMPT_CLEAR, PR_PREEN_OK }, + + /* Journal has an unknown superblock type */ + { PR_0_JOURNAL_UNSUPP_SUPER, + N_("Ext3 journal @S is unknown type %N (unsupported).\n"), + PROMPT_ABORT, PR_NO_OK | PR_AFTER_CODE, PR_0_JOURNAL_BAD_SUPER }, + + /* Journal superblock is corrupt */ + { PR_0_JOURNAL_BAD_SUPER, + N_("Ext3 journal @S is corrupt.\n"), + PROMPT_FIX, PR_PREEN_OK }, + + /* Superblock flag should be cleared */ + { PR_0_JOURNAL_HAS_JOURNAL, + N_("@S doesn't have has_journal flag, but has ext3 journal %s.\n"), + PROMPT_DELETE, PR_PREEN_OK }, + + /* Superblock flag is incorrect */ + { PR_0_JOURNAL_RECOVER_SET, + N_("@S has ext3 needs_recovery flag set, but no journal.\n"), + PROMPT_CLEAR, PR_PREEN_OK }, + + /* Journal should be reset */ + { PR_0_JOURNAL_RESET_JOURNAL, + N_("*** WARNING *** leaving data in the journal may be DANGEROUS.\n"), + PROMPT_NONE, PR_PREEN_NOMSG|PR_AFTER_CODE, PR_0_JOURNAL_RESET_PROMPT}, + + /* Journal should be reset */ + { PR_0_JOURNAL_RESET_PROMPT, + N_("ext3 recovery flag clear, but journal has data.\n"), + PROMPT_CLEAR, PR_PREEN_OK|PR_PREEN_NOMSG }, /* Pass 1 errors */ @@ -1168,11 +1223,11 @@ int fix_problem(e2fsck_t ctx, problem_t code, struct problem_context *pctx) } - if (ptr->flags & PR_AFTER_CODE) - (void) fix_problem(ctx, ptr->second_code, pctx); - if ((ptr->prompt == PROMPT_ABORT) && answer) fatal_error(ctx, 0); + if (ptr->flags & PR_AFTER_CODE) + answer = fix_problem(ctx, ptr->second_code, pctx); + return answer; } diff --git a/e2fsck/problem.h b/e2fsck/problem.h index ac73db31..323b60b8 100644 --- a/e2fsck/problem.h +++ b/e2fsck/problem.h @@ -93,7 +93,40 @@ struct problem_context { /* The Hurd does not support the filetype feature */ #define PR_0_HURD_CLEAR_FILETYPE 0x00000E - + +/* Journal inode is invalid */ +#define PR_0_JOURNAL_BAD_INODE 0x00000F + +/* Superblock has a journal device (which we can't handle yet) */ +#define PR_0_JOURNAL_UNSUPP_DEV 0x000010 + +/* Superblock has a bad journal device */ +#define PR_0_JOURNAL_BAD_DEV 0x000011 + +/* Superblock has a journal UUID (which we can't handle yet) */ +#define PR_0_JOURNAL_UNSUPP_UUID 0x000012 + +/* Superblock has a bad journal UUID */ +#define PR_0_JOURNAL_BAD_UUID 0x000013 + +/* Journal has an unknown superblock type */ +#define PR_0_JOURNAL_UNSUPP_SUPER 0x000014 + +/* Journal superblock is corrupt */ +#define PR_0_JOURNAL_BAD_SUPER 0x000015 + +/* Journal superblock is corrupt */ +#define PR_0_JOURNAL_HAS_JOURNAL 0x000016 + +/* Superblock has recovery flag set but no journal */ +#define PR_0_JOURNAL_RECOVER_SET 0x000017 + +/* Warning message about leaving data in the journal */ +#define PR_0_JOURNAL_RESET_JOURNAL 0x000018 + +/* Superblock recovery flag clear - journal needs to be reset */ +#define PR_0_JOURNAL_RESET_PROMPT 0x000019 + /* * Pass 1 errors */ diff --git a/e2fsck/recovery.c b/e2fsck/recovery.c new file mode 100644 index 00000000..06e12460 --- /dev/null +++ b/e2fsck/recovery.c @@ -0,0 +1,428 @@ +/* + * linux/fs/recovery.c + * + * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 + * + * Copyright 1999 Red Hat Software --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Journal recovery routines for the generic filesystem journaling code; + * part of the ext2fs journaling system. + */ + +#ifndef __KERNEL__ +#include "jfs.h" +#else +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/jfs.h> +#include <linux/errno.h> +#include <linux/malloc.h> +#include <linux/locks.h> +#include <linux/buffer.h> + + +/* Release readahead buffers after use */ +static void brelse_array(struct buffer_head *b[], int n) +{ + while (--n >= 0) + brelse (b[n]); +} + + +/* + * When reading from the journal, we are going through the block device + * layer directly and so there is no readahead being done for us. We + * need to implement any readahead ourselves if we want it to happen at + * all. Recovery is basically one long sequential read, so make sure we + * do the IO in reasonably large chunks. + * + * This is not so critical that we need to be enormously clever about + * the readahead size, though. 128K is a purely arbitrary, good-enough + * fixed value. + */ + +static int do_readahead(journal_t *journal, unsigned int start) +{ + int err; + unsigned int max, nbufs, next, blocknr; + struct buffer_head *bh; + + #define MAXBUF 8 + struct buffer_head * bufs[MAXBUF]; + + /* Do up to 128K of readahead */ + max = start + (128 * 1024 / journal->j_blocksize); + if (max > journal->j_maxlen) + max = journal->j_maxlen; + + /* Do the readahead itself. We'll submit MAXBUF buffer_heads at + * a time to the block device IO layer. */ + + nbufs = 0; + + for (next = start; next < max; next++) { + blocknr = next; + if (journal->j_inode) + blocknr = bmap(journal->j_inode, next); + if (!blocknr) { + printk (KERN_ERR "JFS: bad block at offset %u\n", + next); + err = -EIO; + goto failed; + } + + bh = getblk(journal->j_dev, blocknr, journal->j_blocksize); + if (!bh) { + printk(KERN_ERR "JFS: readahead getblk failed\n"); + err = -ENOMEM; + goto failed; + } + + if (!buffer_uptodate(bh) && !buffer_locked(bh)) { + bufs[nbufs++] = bh; + if (nbufs == MAXBUF) { + ll_rw_block(READ, nbufs, bufs); + brelse_array(bufs, nbufs); + nbufs = 0; + } + } else + brelse(bh); + } + + if (nbufs) + ll_rw_block(READ, nbufs, bufs); + err = 0; + +failed: + if (nbufs) + brelse_array(bufs, nbufs); + return err; +} +#endif + +/* + * Read a block from the journal + */ + +static int jread(struct buffer_head **bhp, journal_t *journal, + unsigned int offset) +{ + unsigned int blocknr; + struct buffer_head *bh; + + *bhp = NULL; + + if (offset >= journal->j_maxlen) + return -EINVAL; + + blocknr = offset; + if (journal->j_inode) + blocknr = bmap(journal->j_inode, offset); + + if (!blocknr) { + printk (KERN_ERR "JFS: bad block at offset %u\n", + offset); + return -EIO; + } + + bh = getblk(journal->j_dev, blocknr, journal->j_blocksize); + if (!bh) + return -ENOMEM; + + if (!buffer_uptodate(bh)) { + /* If this is a brand new buffer, start readahead. + Otherwise, we assume we are already reading it. */ + if (!buffer_req(bh)) + do_readahead(journal, offset); + wait_on_buffer(bh); + } + + if (!buffer_uptodate(bh)) { + printk (KERN_ERR "JFS: Failed to read block at offset %u\n", + offset); + brelse(bh); + return -EIO; + } + + *bhp = bh; + return 0; +} + + +/* + * Count the number of in-use tags in a journal descriptor block. + */ + +int count_tags(struct buffer_head *bh, int size) +{ + char * tagp; + journal_block_tag_t * tag; + int nr = 0; + + tagp = &bh->b_data[sizeof(journal_header_t)]; + + while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) { + tag = (journal_block_tag_t *) tagp; + + nr++; + tagp += sizeof(journal_block_tag_t); + if (!(tag->t_flags & htonl(JFS_FLAG_SAME_UUID))) + tagp += 16; + + if (tag->t_flags & htonl(JFS_FLAG_LAST_TAG)) + break; + } + + return nr; +} + + +/* Make sure we wrap around the log correctly! */ +#define wrap(journal, var) \ +do { \ + if (var >= (journal)->j_last) \ + var -= ((journal)->j_last - (journal)->j_first); \ +} while (0) + +/* + * journal_recover + * + * The primary function for recovering the log contents when mounting a + * journaled device. + */ + +int journal_recover(journal_t *journal) +{ + unsigned int first_commit_ID, next_commit_ID; + unsigned long next_log_block; + unsigned long transaction_start; + int err, success = 0; + journal_superblock_t * jsb; + journal_header_t * tmp; + struct buffer_head * bh; + + /* Precompute the maximum metadata descriptors in a descriptor block */ + int MAX_BLOCKS_PER_DESC; + MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) + / sizeof(journal_block_tag_t)); + + /* + * First thing is to establish what we expect to find in the log + * (in terms of transaction IDs), and where (in terms of log + * block offsets): query the superblock. + */ + + jsb = journal->j_superblock; + next_commit_ID = ntohl(jsb->s_sequence); + next_log_block = ntohl(jsb->s_start); + + first_commit_ID = next_commit_ID; + + /* + * The journal superblock's s_start field (the current log head) + * is always zero if, and only if, the journal was cleanly + * unmounted. + */ + + if (!jsb->s_start) { + jfs_debug(1, "No recovery required, last transaction %d\n", + ntohl(jsb->s_sequence)); + journal->j_transaction_sequence = ++next_commit_ID; + return 0; + } + + jfs_debug(1, "Starting recovery\n"); + + /* + * Now we walk through the log, transaction by transaction, + * making sure that each transaction has a commit block in the + * expected place. Each complete transaction gets replayed back + * into the main filesystem. + */ + + while (1) { + jfs_debug(2, "Looking for commit ID %u at %lu/%lu\n", + next_commit_ID, next_log_block, journal->j_last); + transaction_start = next_log_block; + + while (next_log_block < journal->j_last) { + /* Skip over each chunk of the transaction + * looking either the next descriptor block or + * the final commit record. */ + + jfs_debug(3, "JFS: checking block %ld\n", + next_log_block); + err = jread(&bh, journal, next_log_block); + if (err) + goto failed; + + /* What kind of buffer is it? + * + * If it is a descriptor block, work out the + * expected location of the next and skip to it. + * + * If it is the right commit block, end the + * search and start recovering the transaction. + * + * Any non-control block, or an unexpected + * control block is interpreted as old data from + * a previous wrap of the log: stop recovery at + * this point. + */ + + tmp = (journal_header_t *) bh->b_data; + + if (tmp->h_magic == htonl(JFS_MAGIC_NUMBER)) { + int blocktype = ntohl(tmp->h_blocktype); + jfs_debug(3, "Found magic %d\n", blocktype); + + if (blocktype == JFS_DESCRIPTOR_BLOCK) { + /* Work out where the next descriptor + * should be. */ + next_log_block++; + next_log_block += count_tags(bh, journal->j_blocksize); + wrap(journal, next_log_block); + brelse(bh); + continue; + } else if (blocktype == JFS_COMMIT_BLOCK) { + unsigned int sequence = tmp->h_sequence; + brelse(bh); + if (sequence == htonl(next_commit_ID)) + goto commit; + jfs_debug(2, "found sequence %d, " + "expected %d.\n", + ntohl(sequence), + next_commit_ID); + goto finished; + } + } + + /* We didn't recognise it? OK, we've gone off + * the tail of the log in that case. */ + brelse(bh); + break; + } + + goto finished; + + commit: + jfs_debug(2, "Found transaction %d\n", next_commit_ID); + + /* OK, we have a transaction to commit. Rewind to the + * start of it, gather up all of the buffers in each + * transaction segment, and replay the segments one by + * one. */ + + next_log_block = transaction_start; + + while (1) { + int flags; + char * tagp; + journal_block_tag_t * tag; + struct buffer_head * obh; + struct buffer_head * nbh; + + err = jread(&bh, journal, next_log_block++); + wrap(journal, next_log_block); + if (err) + goto failed; + + tmp = (journal_header_t *) bh->b_data; + /* should never happen - we just checked above - AED */ + J_ASSERT(tmp->h_magic == htonl(JFS_MAGIC_NUMBER)); + + /* If it is the commit block, then we are all done! */ + if (tmp->h_blocktype == htonl(JFS_COMMIT_BLOCK)) { + brelse(bh); + break; + } + + /* A descriptor block: we can now write all of + * the data blocks. Yay, useful work is finally + * getting done here! */ + + tagp = &bh->b_data[sizeof(journal_header_t)]; + + while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) + <= journal->j_blocksize) { + tag = (journal_block_tag_t *) tagp; + flags = ntohl(tag->t_flags); + + err = jread(&obh, journal, next_log_block++); + wrap(journal, next_log_block); + if (err) { + /* Recover what we can, but + * report failure at the end. */ + success = err; + printk (KERN_ERR + "JFS: IO error recovering " + "block %ld in log\n", + next_log_block-1); + } else { + /* can never happen if jread OK - AED */ + J_ASSERT(obh != NULL); + + /* And find a buffer for the new data + * being restored */ + nbh = getblk(journal->j_dev, + ntohl(tag->t_blocknr), + journal->j_blocksize); + if (nbh == NULL) { + printk(KERN_ERR + "JFS: Out of memory " + "during recovery.\n"); + err = -ENOMEM; + brelse(bh); + brelse(obh); + goto failed; + } + + memcpy(nbh->b_data, obh->b_data, + journal->j_blocksize); + if (flags & JFS_FLAG_ESCAPE) { + * ((unsigned int *) bh->b_data) = htonl(JFS_MAGIC_NUMBER); + } + + mark_buffer_dirty(nbh, 1); + /* ll_rw_block(WRITE, 1, &nbh); */ + brelse(obh); + brelse(nbh); + } + + tagp += sizeof(journal_block_tag_t); + if (!(flags & JFS_FLAG_SAME_UUID)) + tagp += 16; + + if (flags & JFS_FLAG_LAST_TAG) + break; + + } /* end of tag loop */ + + brelse(bh); + + } /* end of descriptor block loop */ + + /* We have now replayed that entire transaction: start + * looking for the next transaction. */ + next_commit_ID++; + } + + finished: + err = success; + fsync_dev(journal->j_dev); + + failed: + + /* Restart the log at the next transaction ID, thus invalidating + * any existing commit records in the log. */ + jfs_debug(0, "JFS: recovery, exit status %d, " + "recovered transactions %u to %u\n", + err, first_commit_ID, next_commit_ID); + journal->j_transaction_sequence = ++next_commit_ID; + + return err; +} diff --git a/e2fsck/unix.c b/e2fsck/unix.c index 71a321c0..8ab4dcfe 100644 --- a/e2fsck/unix.c +++ b/e2fsck/unix.c @@ -778,23 +778,48 @@ restart: } #endif s = (struct ext2fs_sb *) fs->super; + + /* + * Set the device name, which is used whenever we print error + * or informational messages to the user. + */ + if (ctx->device_name == 0 && + (s->s_volume_name[0] != 0)) { + char *cp = malloc(sizeof(s->s_volume_name)+1); + if (cp) { + strncpy(cp, s->s_volume_name, + sizeof(s->s_volume_name)); + cp[sizeof(s->s_volume_name)] = 0; + ctx->device_name = cp; + } + } + if (ctx->device_name == 0) + ctx->device_name = ctx->filesystem_name; + /* * Check to see if we need to do ext3-style recovery. If so, * do it, and then restart the fsck. */ + retval = e2fsck_check_ext3_journal(ctx); + if (retval) { + com_err(ctx->program_name, retval, + _("while checking ext3 journal for %s"), + ctx->device_name); + ext2fs_close(ctx->fs); + exit(FSCK_ERROR); + } + if (s->s_feature_incompat & EXT3_FEATURE_INCOMPAT_RECOVER) { - printf("%s: reading journal for ext3 filesystem...\n", - ctx->filesystem_name); - ext2fs_close(fs); - retval = e2fsck_run_ext3_journal(ctx->filesystem_name); + retval = e2fsck_run_ext3_journal(ctx); if (retval) { com_err(ctx->program_name, retval, - ": couldn't load ext3 journal for %s", - ctx->filesystem_name); + _("while recovering ext3 journal of %s"), + ctx->device_name); exit(FSCK_ERROR); } goto restart; } + /* * Check for compatibility with the feature sets. We need to * be more stringent than ext2fs_open(). @@ -802,12 +827,12 @@ restart: if ((s->s_feature_compat & ~EXT2_LIB_FEATURE_COMPAT_SUPP) || (s->s_feature_incompat & ~EXT2_LIB_FEATURE_INCOMPAT_SUPP)) { com_err(ctx->program_name, EXT2_ET_UNSUPP_FEATURE, - "(%s)", ctx->filesystem_name); + "(%s)", ctx->device_name); goto get_newer; } if (s->s_feature_ro_compat & ~EXT2_LIB_FEATURE_RO_COMPAT_SUPP) { com_err(ctx->program_name, EXT2_ET_RO_UNSUPP_FEATURE, - "(%s)", ctx->filesystem_name); + "(%s)", ctx->device_name); goto get_newer; } #ifdef ENABLE_COMPRESSION @@ -815,18 +840,6 @@ restart: com_err(ctx->program_name, 0, _("Warning: compression support is experimental.\n")); #endif - if (ctx->device_name == 0 && - (s->s_volume_name[0] != 0)) { - char *cp = malloc(sizeof(s->s_volume_name)+1); - if (cp) { - strncpy(cp, s->s_volume_name, - sizeof(s->s_volume_name)); - cp[sizeof(s->s_volume_name)] = 0; - ctx->device_name = cp; - } - } - if (ctx->device_name == 0) - ctx->device_name = ctx->filesystem_name; /* * If the user specified a specific superblock, presumably the |