log write() data

author Robert Morris <rtm@csail.mit.edu>

Fri, 12 Aug 2011 13:25:39 +0000 (09:25 -0400)

committer Robert Morris <rtm@csail.mit.edu>

Fri, 12 Aug 2011 13:25:39 +0000 (09:25 -0400)
author Robert Morris <rtm@csail.mit.edu>
Fri, 12 Aug 2011 13:25:39 +0000 (09:25 -0400)
committer Robert Morris <rtm@csail.mit.edu>
Fri, 12 Aug 2011 13:25:39 +0000 (09:25 -0400)
diff --git a/file.c b/file.c

index e10b8249211046414a20c0f8b2a55eadf53a55fb..7101a502228517aa696de1ebdb349aaa18c60a75 100644 (file)
--- a/file.c
+++ b/file.c
@@ -67,8 +67,11 @@ fileclose(struct file *f)
    
    if(ff.type == FD_PIPE)
      pipeclose(ff.pipe, ff.writable);
-  else if(ff.type == FD_INODE)
+  else if(ff.type == FD_INODE){
+    begin_trans();
      iput(ff.ip);
+    commit_trans();
+  }
  }
  
  // Get metadata about file f.
@@ -116,10 +119,30 @@ filewrite(struct file *f, char *addr, int n)
      return pipewrite(f->pipe, addr, n);
    if(f->type == FD_INODE){
      ilock(f->ip);
-    if((r = writei(f->ip, addr, f->off, n)) > 0)
+    // write a few blocks at a time to avoid exceeding
+    // the maximum log transaction size, including
+    // i-node, indirect block, allocation blocks,
+    // and 2 blocks of slop for non-aligned writes.
+    // this really belongs lower down, since writei()
+    // might be writing a device like the console.
+    int max = ((LOGSIZE-1-1-2) / 2) * 512;
+    int i = 0;
+    while(i < n){
+      int n1 = n - i;
+      if(n1 > max)
+        n1 = max;
+      begin_trans();
+      r = writei(f->ip, addr + i, f->off, n1);
+      commit_trans();
+      if(r < 0)
+        break;
+      if(r != n1)
+        panic("short filewrite");
        f->off += r;
+      i += r;
+    }
      iunlock(f->ip);
-    return r;
+    return i == n ? n : -1;
    }
    panic("filewrite");
  }
diff --git a/fs.c b/fs.c

index a414b6547ee609faa7b60ad335ea6dd329d8da83..a76788b9bd2b3dbeae301659db52441dc837d294 100644 (file)
--- a/fs.c
+++ b/fs.c
@@ -437,13 +437,13 @@ writei(struct inode *ip, char *src, uint off, uint n)
    if(off > ip->size || off + n < off)
      return -1;
    if(off + n > MAXFILE*BSIZE)
-    n = MAXFILE*BSIZE - off;
+    return -1;
  
    for(tot=0; tot<n; tot+=m, off+=m, src+=m){
      bp = bread(ip->dev, bmap(ip, off/BSIZE));
      m = min(n - tot, BSIZE - off%BSIZE);
      memmove(bp->data + off%BSIZE, src, m);
-    bwrite(bp);
+    log_write(bp);
      brelse(bp);
    }
  
diff --git a/log.c b/log.c

index 72a0367be620e11eb3af60c75731dbf61758a632..db36ba9d2b370bc3cb0e660964f9c6897cf39ef9 100644 (file)
--- a/log.c
+++ b/log.c
@@ -8,18 +8,36 @@
  #include "fs.h"
  #include "buf.h"
  
-// Dirt simple "logging" supporting only one transaction.  All file system calls
-// that potentially write a block should be wrapped in begin_trans and commit_trans,
-// so that there is never more than one transaction. This serializes all file system 
-// operations that potentially write, but simplifies recovery (only the last
-// one transaction to recover) and concurrency (don't have to worry about reading a modified
-// block from a transaction that hasn't committed yet).
-
-// The header of the log.  If head == 0, there are no log entries.  All entries till head
-// are committed. sector[] records the home sector for each block in the log 
-// (i.e., physical logging).
+// Simple logging. Each system call that might write the file system
+// should be surrounded with begin_trans() and commit_trans() calls.
+//
+// The log holds at most one transaction at a time. Commit forces
+// the log (with commit record) to disk, then installs the affected
+// blocks to disk, then erases the log. begin_trans() ensures that
+// only one system call can be in a transaction; others must wait.
+// 
+// Allowing only one transaction at a time means that the file
+// system code doesn't have to worry about the possibility of
+// one transaction reading a block that another one has modified,
+// for example an i-node block.
+//
+// Read-only system calls don't need to use transactions, though
+// this means that they may observe uncommitted data. I-node
+// and buffer locks prevent read-only calls from seeing inconsistent data.
+//
+// The log is a physical re-do log containing disk blocks.
+// The on-disk log format:
+//   header block, containing sector #s for block A, B, C, ...
+//   block A
+//   block B
+//   block C
+//   ...
+// Log appends are synchronous.
+
+// Contents of the header block, used for both the on-disk header block
+// and to keep track in memory of logged sector #s before commit.
  struct logheader {
-  int head;   
+  int n;   
    int sector[LOGSIZE];
  };
  
@@ -55,10 +73,10 @@ install_trans(void)
  {
    int tail;
  
-  if (log.lh.head > 0)
-    cprintf("install_trans %d\n", log.lh.head);
-  for (tail = 0; tail < log.lh.head; tail++) {
-    cprintf("put entry %d to disk block %d\n", tail, log.lh.sector[tail]);
+  //if (log.lh.n > 0)
+  //  cprintf("install_trans %d\n", log.lh.n);
+  for (tail = 0; tail < log.lh.n; tail++) {
+    // cprintf("put entry %d to disk block %d\n", tail, log.lh.sector[tail]);
      struct buf *lbuf = bread(log.dev, log.start+tail+1);   // read i'th block from log
      struct buf *dbuf = bread(log.dev, log.lh.sector[tail]);  // read dst block
      memmove(dbuf->data, lbuf->data, BSIZE);
@@ -75,27 +93,27 @@ read_head(void)
    struct buf *buf = bread(log.dev, log.start);
    struct logheader *lh = (struct logheader *) (buf->data);
    int i;
-  log.lh.head = lh->head;
-  for (i = 0; i < log.lh.head; i++) {
+  log.lh.n = lh->n;
+  for (i = 0; i < log.lh.n; i++) {
      log.lh.sector[i] = lh->sector[i];
    }
    brelse(buf);
-  if (log.lh.head > 0)
-    cprintf("read_head: %d\n", log.lh.head);
+  //if (log.lh.n > 0)
+  //  cprintf("read_head: %d\n", log.lh.n);
  }
  
  // Write the in-memory log header to disk, committing log entries till head
  static void
  write_head(void)
  {
-  if (log.lh.head > 0)
-    cprintf("write_head: %d\n", log.lh.head);
+  // if (log.lh.n > 0)
+  //   cprintf("write_head: %d\n", log.lh.n);
  
    struct buf *buf = bread(log.dev, log.start);
    struct logheader *hb = (struct logheader *) (buf->data);
    int i;
-  hb->head = log.lh.head;
-  for (i = 0; i < log.lh.head; i++) {
+  hb->n = log.lh.n;
+  for (i = 0; i < log.lh.n; i++) {
      hb->sector[i] = log.lh.sector[i];
    }
    bwrite(buf);
@@ -107,7 +125,7 @@ recover_from_log(void)
  {
    read_head();      
    install_trans();  // Install all transactions till head
-  log.lh.head = 0;
+  log.lh.n = 0;
    write_head();     //  Reclaim log
  }
  
@@ -127,7 +145,7 @@ commit_trans(void)
  {
    write_head();        // This causes all blocks till log.head to be commited
    install_trans();     // Install all the transactions till head
-  log.lh.head = 0; 
+  log.lh.n = 0; 
    write_head();        // Reclaim log
  
    acquire(&log.lock);
@@ -136,21 +154,27 @@ commit_trans(void)
    release(&log.lock);
  }
  
-// Write buffer into the log at log.head and record the block number log.lh.entry, but
-// don't write the log header (which would commit the write).
+// Caller has modified b->data and is done with the buffer.
+// Append the block to the log and record the block number, 
+// but don't write the log header (which would commit the write).
+// log_write() replaces bwrite(); a typical use is:
+//   bp = bread(...)
+//   modify bp->data[]
+//   log_write(bp)
+//   brelse(bp)
  void
  log_write(struct buf *b)
  {
    int i;
  
-  if (log.lh.head >= LOGSIZE)
+  if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1)
      panic("too big a transaction");
    if (!log.intrans)
      panic("write outside of trans");
  
-  cprintf("log_write: %d %d\n", b->sector, log.lh.head);
+  // cprintf("log_write: %d %d\n", b->sector, log.lh.n);
  
-  for (i = 0; i < log.lh.head; i++) {
+  for (i = 0; i < log.lh.n; i++) {
      if (log.lh.sector[i] == b->sector)   // log absorbtion?
        break;
    }
@@ -159,6 +183,6 @@ log_write(struct buf *b)
    memmove(lbuf->data, b->data, BSIZE);
    bwrite(lbuf);
    brelse(lbuf);
-  if (i == log.lh.head)
-    log.lh.head++;
+  if (i == log.lh.n)
+    log.lh.n++;
  }
diff --git a/param.h b/param.h

index 03c05f92d98b21f0f10923ef9af733beb7d675a3..b6f6f469673201888fe33605e146137f040a2618 100644 (file)
--- a/param.h
+++ b/param.h
@@ -8,5 +8,5 @@
  #define NDEV         10  // maximum major device number
  #define ROOTDEV       1  // device number of file system root disk
  #define MAXARG       32  // max exec arguments
-#define LOGSIZE      10  // size of log
+#define LOGSIZE      10  // max data sectors in on-disk log
  
diff --git a/syscall.c b/syscall.c

index b848716e148a14bf79a21e04f92050b3a9032e5b..71c369c6d5ec2077a065571151f8129cf7ede45d 100644 (file)
--- a/syscall.c
+++ b/syscall.c
@@ -141,9 +141,7 @@ syscall(void)
    if(num >= 0 && num < SYS_open && syscalls[num]) {
      proc->tf->eax = syscalls[num]();
    } else if (num >= SYS_open && num < NELEM(syscalls) && syscalls[num]) {
-    begin_trans();
      proc->tf->eax = syscalls[num]();
-    commit_trans();
    } else {
      cprintf("%d %s: unknown sys call %d\n",
              proc->pid, proc->name, num);
diff --git a/sysfile.c b/sysfile.c

index 4235660fa59e08c556fef579effa0bb935e248eb..ca5401398d60b367cef872bf16590b5c0e4b53ad 100644 (file)
--- a/sysfile.c
+++ b/sysfile.c
@@ -121,6 +121,9 @@ sys_link(void)
      iunlockput(ip);
      return -1;
    }
+
+  begin_trans();
+
    ip->nlink++;
    iupdate(ip);
    iunlock(ip);
@@ -134,6 +137,9 @@ sys_link(void)
    }
    iunlockput(dp);
    iput(ip);
+
+  commit_trans();
+
    return 0;
  
  bad:
@@ -141,6 +147,7 @@ bad:
    ip->nlink--;
    iupdate(ip);
    iunlockput(ip);
+  commit_trans();
    return -1;
  }
  
@@ -195,6 +202,8 @@ sys_unlink(void)
      return -1;
    }
  
+  begin_trans();
+
    memset(&de, 0, sizeof(de));
    if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
      panic("unlink: writei");
@@ -207,6 +216,9 @@ sys_unlink(void)
    ip->nlink--;
    iupdate(ip);
    iunlockput(ip);
+
+  commit_trans();
+
    return 0;
  }
  
@@ -251,6 +263,7 @@ create(char *path, short type, short major, short minor)
      panic("create: dirlink");
  
    iunlockput(dp);
+
    return ip;
  }
  
@@ -265,7 +278,10 @@ sys_open(void)
    if(argstr(0, &path) < 0 || argint(1, &omode) < 0)
      return -1;
    if(omode & O_CREATE){
-    if((ip = create(path, T_FILE, 0, 0)) == 0)
+    begin_trans();
+    ip = create(path, T_FILE, 0, 0);
+    commit_trans();
+    if(ip == 0)
        return -1;
    } else {
      if((ip = namei(path)) == 0)
@@ -299,9 +315,13 @@ sys_mkdir(void)
    char *path;
    struct inode *ip;
  
-  if(argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0)
+  begin_trans();
+  if(argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){
+    commit_trans();
      return -1;
+  }
    iunlockput(ip);
+  commit_trans();
    return 0;
  }
  
@@ -313,12 +333,16 @@ sys_mknod(void)
    int len;
    int major, minor;
    
+  begin_trans();
    if((len=argstr(0, &path)) < 0 ||
       argint(1, &major) < 0 ||
       argint(2, &minor) < 0 ||
-     (ip = create(path, T_DEV, major, minor)) == 0)
+     (ip = create(path, T_DEV, major, minor)) == 0){
+    commit_trans();
      return -1;
+  }
    iunlockput(ip);
+  commit_trans();
    return 0;
  }
  
diff --git a/usertests.c b/usertests.c

index 3bffadbae08e720051d7a7e5262fca0b30632d15..ba648a74fe1f0f51fc114b25c8f2be5d6455c7fe 100644 (file)
--- a/usertests.c
+++ b/usertests.c
@@ -7,7 +7,7 @@
  #include "traps.h"
  #include "memlayout.h"
  
-char buf[2048];
+char buf[8192];
  char name[3];
  char *echoargv[] = { "echo", "ALL", "TESTS", "PASSED", 0 };
  int stdout = 1;
@@ -968,6 +968,36 @@ subdir(void)
    printf(1, "subdir ok\n");
  }
  
+// test writes that are larger than the log.
+void
+bigwrite(void)
+{
+  int fd, sz;
+
+  printf(1, "bigwrite test\n");
+
+  unlink("bigwrite");
+  for(sz = 499; sz < 12*512; sz += 471){
+    fd = open("bigwrite", O_CREATE | O_RDWR);
+    if(fd < 0){
+      printf(1, "cannot create bigwrite\n");
+      exit();
+    }
+    int i;
+    for(i = 0; i < 2; i++){
+      int cc = write(fd, buf, sz);
+      if(cc != sz){
+        printf(1, "write(%d) ret %d\n", sz, cc);
+        exit();
+      }
+    }
+    close(fd);
+    unlink("bigwrite");
+  }
+
+  printf(1, "bigwrite ok\n");
+}
+
  void
  bigfile(void)
  {
@@ -1467,6 +1497,7 @@ main(int argc, char *argv[])
    }
    close(open("usertests.ran", O_CREATE));
  
+  bigwrite();
    bigargtest();
    bsstest();
    sbrktest();
author	Robert Morris <rtm@csail.mit.edu>
	Fri, 12 Aug 2011 13:25:39 +0000 (09:25 -0400)
committer	Robert Morris <rtm@csail.mit.edu>
	Fri, 12 Aug 2011 13:25:39 +0000 (09:25 -0400)
file.c		patch \| blob \| history
fs.c		patch \| blob \| history
log.c		patch \| blob \| history
param.h		patch \| blob \| history
syscall.c		patch \| blob \| history
sysfile.c		patch \| blob \| history
usertests.c		patch \| blob \| history