From 9a9d91e4475984cc39906144cb20c69a468c643f Mon Sep 17 00:00:00 2001 From: David Devecsery Date: Thu, 14 May 2020 11:13:00 -0400 Subject: [PATCH] Just realized old gitignore was causing kernel not to push... that could have been bad. --- .gitignore | 14 - kernel/CMakeLists.txt | 102 ++++++ kernel/Sources.cmake | 39 +++ kernel/include/buf.h | 14 + kernel/include/date.h | 8 + kernel/include/defs.h | 190 ++++++++++ kernel/include/file.h | 37 ++ kernel/include/kbd.h | 112 ++++++ kernel/include/mp.h | 56 +++ kernel/include/proc.h | 60 ++++ kernel/include/sleeplock.h | 10 + kernel/include/spinlock.h | 13 + kernel/kernel.ld | 68 ++++ kernel/src/asm/entry.S | 68 ++++ kernel/src/asm/entryother.S | 93 +++++ kernel/src/asm/initcode.S | 32 ++ kernel/src/asm/swtch.S | 29 ++ kernel/src/asm/trapasm.S | 32 ++ kernel/src/bio.c | 144 ++++++++ kernel/src/console.c | 300 ++++++++++++++++ kernel/src/exec.c | 115 +++++++ kernel/src/file.c | 157 +++++++++ kernel/src/fs.c | 670 ++++++++++++++++++++++++++++++++++++ kernel/src/ide.c | 169 +++++++++ kernel/src/ioapic.c | 75 ++++ kernel/src/kalloc.c | 96 ++++++ kernel/src/kbd.c | 50 +++ kernel/src/lapic.c | 229 ++++++++++++ kernel/src/log.c | 234 +++++++++++++ kernel/src/main.c | 118 +++++++ kernel/src/memide.c | 60 ++++ kernel/src/mp.c | 139 ++++++++ kernel/src/picirq.c | 19 + kernel/src/pipe.c | 121 +++++++ kernel/src/proc.c | 534 ++++++++++++++++++++++++++++ kernel/src/sleeplock.c | 56 +++ kernel/src/spinlock.c | 123 +++++++ kernel/src/string.c | 105 ++++++ kernel/src/syscall.c | 145 ++++++++ kernel/src/sysfile.c | 444 ++++++++++++++++++++++++ kernel/src/sysproc.c | 91 +++++ kernel/src/trap.c | 112 ++++++ kernel/src/uart.c | 77 +++++ kernel/src/vm.c | 394 +++++++++++++++++++++ kernel/tools/mksym.sh | 3 + kernel/tools/vectors.pl | 47 +++ user/Sources.cmake | 25 ++ 47 files changed, 5815 insertions(+), 14 deletions(-) create mode 100644 kernel/CMakeLists.txt create mode 100644 kernel/Sources.cmake create mode 100644 kernel/include/buf.h create mode 100644 kernel/include/date.h create mode 100644 kernel/include/defs.h create mode 100644 kernel/include/file.h create mode 100644 kernel/include/kbd.h create mode 100644 kernel/include/mp.h create mode 100644 kernel/include/proc.h create mode 100644 kernel/include/sleeplock.h create mode 100644 kernel/include/spinlock.h create mode 100644 kernel/kernel.ld create mode 100644 kernel/src/asm/entry.S create mode 100644 kernel/src/asm/entryother.S create mode 100644 kernel/src/asm/initcode.S create mode 100644 kernel/src/asm/swtch.S create mode 100644 kernel/src/asm/trapasm.S create mode 100644 kernel/src/bio.c create mode 100644 kernel/src/console.c create mode 100644 kernel/src/exec.c create mode 100644 kernel/src/file.c create mode 100644 kernel/src/fs.c create mode 100644 kernel/src/ide.c create mode 100644 kernel/src/ioapic.c create mode 100644 kernel/src/kalloc.c create mode 100644 kernel/src/kbd.c create mode 100644 kernel/src/lapic.c create mode 100644 kernel/src/log.c create mode 100644 kernel/src/main.c create mode 100644 kernel/src/memide.c create mode 100644 kernel/src/mp.c create mode 100644 kernel/src/picirq.c create mode 100644 kernel/src/pipe.c create mode 100644 kernel/src/proc.c create mode 100644 kernel/src/sleeplock.c create mode 100644 kernel/src/spinlock.c create mode 100644 kernel/src/string.c create mode 100644 kernel/src/syscall.c create mode 100644 kernel/src/sysfile.c create mode 100644 kernel/src/sysproc.c create mode 100644 kernel/src/trap.c create mode 100644 kernel/src/uart.c create mode 100644 kernel/src/vm.c create mode 100755 kernel/tools/mksym.sh create mode 100755 kernel/tools/vectors.pl create mode 100644 user/Sources.cmake diff --git a/.gitignore b/.gitignore index 3e2c9de..0a46411 100644 --- a/.gitignore +++ b/.gitignore @@ -1,16 +1,2 @@ *~ -_* -*.o -*.d -*.asm -*.sym -*.img -vectors.S -bootblock -entryother -initcode -initcode.out -kernel -kernelmemfs -mkfs .gdbinit diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt new file mode 100644 index 0000000..0608770 --- /dev/null +++ b/kernel/CMakeLists.txt @@ -0,0 +1,102 @@ +project(kernel ASM) + +include_directories(include) + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -static -fno-builtin -fno-pic -m32") +set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS}") +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -m elf_i386") + +include(Sources.cmake) + +add_custom_command( + OUTPUT gen/vectors.S + COMMAND mkdir -p gen && ${CMAKE_CURRENT_SOURCE_DIR}/tools/vectors.pl > gen/vectors.S + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/tools/vectors.pl) + +add_library(vectorobj OBJECT + gen/vectors.S) + +add_library(kernelobjs OBJECT + ${kernel_SOURCES}) + +add_prefix_suffix(kernel_OBJECTS + "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/kernelobjs.dir/" + ".o" + ${kernel_SOURCES}) + +set(entryother_SOURCES + src/asm/entryother.S) + +add_library(entryotherobjs OBJECT + ${entryother_SOURCES}) + +add_prefix_suffix(entryother_OBJECTS + "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/entryotherobjs.dir/" + ".o" + ${entryother_SOURCES}) + +set(initcode_SOURCES + src/asm/initcode.S) + +add_library(initcodeobjs OBJECT + ${initcode_SOURCES}) + +add_prefix_suffix(initcode_OBJECTS + "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/initcodeobjs.dir/" + ".o" + ${initcode_SOURCES}) + +add_custom_command( + OUTPUT entryother.obj + COMMAND ld -m elf_i386 -N -e start -Ttext 0x7000 -o entryother.obj ${entryother_OBJECTS} + DEPENDS $) + +add_custom_command( + OUTPUT entryother + COMMAND objcopy -S -O binary -j .text entryother.obj entryother + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/entryother.obj) + +add_custom_command( + OUTPUT initcode.obj + COMMAND ld -m elf_i386 -N -e start -Ttext 0 -o initcode.obj ${initcode_OBJECTS} + DEPENDS $) + +add_custom_command( + OUTPUT initcode + COMMAND objcopy -S -O binary -j .text initcode.obj initcode + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/initcode.obj) + +add_custom_command( + OUTPUT kernel + COMMAND ld -m elf_i386 -nostdlib -T ${CMAKE_CURRENT_SOURCE_DIR}/kernel.ld -o kernel ${kernel_OBJECTS} -b binary initcode entryother + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/kernel.ld $ $ ${CMAKE_CURRENT_BINARY_DIR}/initcode ${CMAKE_CURRENT_BINARY_DIR}/entryother) + +add_custom_command( + OUTPUT kernel.asm + COMMAND objdump -S kernel > kernel.asm + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/kernel) + +add_custom_command( + OUTPUT initcode.asm + COMMAND objdump -S initcode.obj > initcode.asm + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/initcode.obj) + +add_custom_command( + OUTPUT entryother.asm + COMMAND objdump -S entryother.obj > entryother.asm + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/entryother.obj) + +add_custom_command( + OUTPUT kernel.sym + COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tools/mksym.sh + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/kernel) + +add_custom_target( + buildkern ALL + # FIXME: For some reaosn I need to specify kernelobjs, entryotherobjs, and + # initcodeobjs, or the build system freaks out and dies (blerg) + DEPENDS kernelobjs vectorobj entryotherobjs initcodeobjs kernel kernel.asm kernel.sym entryother.asm initcode.asm) + #DEPENDS kernel kernel.asm kernel.sym) + +set_property(TARGET kernelobjs PROPERTY C_STANDARD 11) + diff --git a/kernel/Sources.cmake b/kernel/Sources.cmake new file mode 100644 index 0000000..78ce385 --- /dev/null +++ b/kernel/Sources.cmake @@ -0,0 +1,39 @@ +# Source files built along with the kernel + +set(kernel_SOURCES + # ASM Files + src/asm/entry.S + src/asm/swtch.S + src/asm/trapasm.S + + # Generated + gen/vectors.S + + # C Files + src/bio.c + src/console.c + src/exec.c + src/file.c + src/fs.c + src/ide.c + src/ioapic.c + src/kalloc.c + src/kbd.c + src/lapic.c + src/log.c + src/main.c + src/mp.c + src/picirq.c + src/pipe.c + src/proc.c + src/sleeplock.c + src/spinlock.c + src/string.c + src/syscall.c + src/sysfile.c + src/sysproc.c + src/trap.c + src/uart.c + src/vm.c + ) + diff --git a/kernel/include/buf.h b/kernel/include/buf.h new file mode 100644 index 0000000..3266495 --- /dev/null +++ b/kernel/include/buf.h @@ -0,0 +1,14 @@ +struct buf { + int flags; + uint dev; + uint blockno; + struct sleeplock lock; + uint refcnt; + struct buf *prev; // LRU cache list + struct buf *next; + struct buf *qnext; // disk queue + uchar data[BSIZE]; +}; +#define B_VALID 0x2 // buffer has been read from disk +#define B_DIRTY 0x4 // buffer needs to be written to disk + diff --git a/kernel/include/date.h b/kernel/include/date.h new file mode 100644 index 0000000..94aec4b --- /dev/null +++ b/kernel/include/date.h @@ -0,0 +1,8 @@ +struct rtcdate { + uint second; + uint minute; + uint hour; + uint day; + uint month; + uint year; +}; diff --git a/kernel/include/defs.h b/kernel/include/defs.h new file mode 100644 index 0000000..82fb982 --- /dev/null +++ b/kernel/include/defs.h @@ -0,0 +1,190 @@ +struct buf; +struct context; +struct file; +struct inode; +struct pipe; +struct proc; +struct rtcdate; +struct spinlock; +struct sleeplock; +struct stat; +struct superblock; + +// bio.c +void binit(void); +struct buf* bread(uint, uint); +void brelse(struct buf*); +void bwrite(struct buf*); + +// console.c +void consoleinit(void); +void cprintf(char*, ...); +void consoleintr(int(*)(void)); +void panic(char*) __attribute__((noreturn)); + +// exec.c +int exec(char*, char**); + +// file.c +struct file* filealloc(void); +void fileclose(struct file*); +struct file* filedup(struct file*); +void fileinit(void); +int fileread(struct file*, char*, int n); +int filestat(struct file*, struct stat*); +int filewrite(struct file*, char*, int n); + +// fs.c +void readsb(int dev, struct superblock *sb); +int dirlink(struct inode*, char*, uint); +struct inode* dirlookup(struct inode*, char*, uint*); +struct inode* ialloc(uint, short); +struct inode* idup(struct inode*); +void iinit(int dev); +void ilock(struct inode*); +void iput(struct inode*); +void iunlock(struct inode*); +void iunlockput(struct inode*); +void iupdate(struct inode*); +int namecmp(const char*, const char*); +struct inode* namei(char*); +struct inode* nameiparent(char*, char*); +int readi(struct inode*, char*, uint, uint); +void stati(struct inode*, struct stat*); +int writei(struct inode*, char*, uint, uint); + +// ide.c +void ideinit(void); +void ideintr(void); +void iderw(struct buf*); + +// ioapic.c +void ioapicenable(int irq, int cpu); +extern uchar ioapicid; +void ioapicinit(void); + +// kalloc.c +char* kalloc(void); +void kfree(char*); +void kinit1(void*, void*); +void kinit2(void*, void*); + +// kbd.c +void kbdintr(void); + +// lapic.c +void cmostime(struct rtcdate *r); +int lapicid(void); +extern volatile uint* lapic; +void lapiceoi(void); +void lapicinit(void); +void lapicstartap(uchar, uint); +void microdelay(int); + +// log.c +void initlog(int dev); +void log_write(struct buf*); +void begin_op(); +void end_op(); + +// mp.c +extern int ismp; +void mpinit(void); + +// picirq.c +void picenable(int); +void picinit(void); + +// pipe.c +int pipealloc(struct file**, struct file**); +void pipeclose(struct pipe*, int); +int piperead(struct pipe*, char*, int); +int pipewrite(struct pipe*, char*, int); + +//PAGEBREAK: 16 +// proc.c +int cpuid(void); +void exit(void); +int fork(void); +int growproc(int); +int kill(int); +struct cpu* mycpu(void); +struct proc* myproc(); +void pinit(void); +void procdump(void); +void scheduler(void) __attribute__((noreturn)); +void sched(void); +void setproc(struct proc*); +void sleep(void*, struct spinlock*); +void userinit(void); +int wait(void); +void wakeup(void*); +void yield(void); + +// swtch.S +void swtch(struct context**, struct context*); + +// spinlock.c +void acquire(struct spinlock*); +void getcallerpcs(void*, uint*); +int holding(struct spinlock*); +void initlock(struct spinlock*, char*); +void release(struct spinlock*); +void pushcli(void); +void popcli(void); + +// sleeplock.c +void acquiresleep(struct sleeplock*); +void releasesleep(struct sleeplock*); +int holdingsleep(struct sleeplock*); +void initsleeplock(struct sleeplock*, char*); + +// string.c +int memcmp(const void*, const void*, uint); +void* memmove(void*, const void*, uint); +void* memset(void*, int, uint); +char* safestrcpy(char*, const char*, int); +int strlen(const char*); +int strncmp(const char*, const char*, uint); +char* strncpy(char*, const char*, int); + +// syscall.c +int argint(int, int*); +int argptr(int, char**, int); +int argstr(int, char**); +int fetchint(uint, int*); +int fetchstr(uint, char**); +void syscall(void); + +// timer.c +void timerinit(void); + +// trap.c +void idtinit(void); +extern uint ticks; +void tvinit(void); +extern struct spinlock tickslock; + +// uart.c +void uartinit(void); +void uartintr(void); +void uartputc(int); + +// vm.c +void seginit(void); +void kvmalloc(void); +pde_t* setupkvm(void); +char* uva2ka(pde_t*, char*); +int allocuvm(pde_t*, uint, uint); +int deallocuvm(pde_t*, uint, uint); +void freevm(pde_t*); +void inituvm(pde_t*, char*, uint); +int loaduvm(pde_t*, char*, struct inode*, uint, uint); +pde_t* copyuvm(pde_t*, uint); +void switchuvm(struct proc*); +void switchkvm(void); +int copyout(pde_t*, uint, void*, uint); +void clearpteu(pde_t *pgdir, char *uva); + +// number of elements in fixed-size array +#define NELEM(x) (sizeof(x)/sizeof((x)[0])) diff --git a/kernel/include/file.h b/kernel/include/file.h new file mode 100644 index 0000000..0990c82 --- /dev/null +++ b/kernel/include/file.h @@ -0,0 +1,37 @@ +struct file { + enum { FD_NONE, FD_PIPE, FD_INODE } type; + int ref; // reference count + char readable; + char writable; + struct pipe *pipe; + struct inode *ip; + uint off; +}; + + +// in-memory copy of an inode +struct inode { + uint dev; // Device number + uint inum; // Inode number + int ref; // Reference count + struct sleeplock lock; // protects everything below here + int valid; // inode has been read from disk? + + short type; // copy of disk inode + short major; + short minor; + short nlink; + uint size; + uint addrs[NDIRECT+1]; +}; + +// table mapping major device number to +// device functions +struct devsw { + int (*read)(struct inode*, char*, int); + int (*write)(struct inode*, char*, int); +}; + +extern struct devsw devsw[]; + +#define CONSOLE 1 diff --git a/kernel/include/kbd.h b/kernel/include/kbd.h new file mode 100644 index 0000000..babbd6e --- /dev/null +++ b/kernel/include/kbd.h @@ -0,0 +1,112 @@ +// PC keyboard interface constants + +#define KBSTATP 0x64 // kbd controller status port(I) +#define KBS_DIB 0x01 // kbd data in buffer +#define KBDATAP 0x60 // kbd data port(I) + +#define NO 0 + +#define SHIFT (1<<0) +#define CTL (1<<1) +#define ALT (1<<2) + +#define CAPSLOCK (1<<3) +#define NUMLOCK (1<<4) +#define SCROLLLOCK (1<<5) + +#define E0ESC (1<<6) + +// Special keycodes +#define KEY_HOME 0xE0 +#define KEY_END 0xE1 +#define KEY_UP 0xE2 +#define KEY_DN 0xE3 +#define KEY_LF 0xE4 +#define KEY_RT 0xE5 +#define KEY_PGUP 0xE6 +#define KEY_PGDN 0xE7 +#define KEY_INS 0xE8 +#define KEY_DEL 0xE9 + +// C('A') == Control-A +#define C(x) (x - '@') + +static uchar shiftcode[256] = +{ + [0x1D] CTL, + [0x2A] SHIFT, + [0x36] SHIFT, + [0x38] ALT, + [0x9D] CTL, + [0xB8] ALT +}; + +static uchar togglecode[256] = +{ + [0x3A] CAPSLOCK, + [0x45] NUMLOCK, + [0x46] SCROLLLOCK +}; + +static uchar normalmap[256] = +{ + NO, 0x1B, '1', '2', '3', '4', '5', '6', // 0x00 + '7', '8', '9', '0', '-', '=', '\b', '\t', + 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', // 0x10 + 'o', 'p', '[', ']', '\n', NO, 'a', 's', + 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';', // 0x20 + '\'', '`', NO, '\\', 'z', 'x', 'c', 'v', + 'b', 'n', 'm', ',', '.', '/', NO, '*', // 0x30 + NO, ' ', NO, NO, NO, NO, NO, NO, + NO, NO, NO, NO, NO, NO, NO, '7', // 0x40 + '8', '9', '-', '4', '5', '6', '+', '1', + '2', '3', '0', '.', NO, NO, NO, NO, // 0x50 + [0x9C] '\n', // KP_Enter + [0xB5] '/', // KP_Div + [0xC8] KEY_UP, [0xD0] KEY_DN, + [0xC9] KEY_PGUP, [0xD1] KEY_PGDN, + [0xCB] KEY_LF, [0xCD] KEY_RT, + [0x97] KEY_HOME, [0xCF] KEY_END, + [0xD2] KEY_INS, [0xD3] KEY_DEL +}; + +static uchar shiftmap[256] = +{ + NO, 033, '!', '@', '#', '$', '%', '^', // 0x00 + '&', '*', '(', ')', '_', '+', '\b', '\t', + 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', // 0x10 + 'O', 'P', '{', '}', '\n', NO, 'A', 'S', + 'D', 'F', 'G', 'H', 'J', 'K', 'L', ':', // 0x20 + '"', '~', NO, '|', 'Z', 'X', 'C', 'V', + 'B', 'N', 'M', '<', '>', '?', NO, '*', // 0x30 + NO, ' ', NO, NO, NO, NO, NO, NO, + NO, NO, NO, NO, NO, NO, NO, '7', // 0x40 + '8', '9', '-', '4', '5', '6', '+', '1', + '2', '3', '0', '.', NO, NO, NO, NO, // 0x50 + [0x9C] '\n', // KP_Enter + [0xB5] '/', // KP_Div + [0xC8] KEY_UP, [0xD0] KEY_DN, + [0xC9] KEY_PGUP, [0xD1] KEY_PGDN, + [0xCB] KEY_LF, [0xCD] KEY_RT, + [0x97] KEY_HOME, [0xCF] KEY_END, + [0xD2] KEY_INS, [0xD3] KEY_DEL +}; + +static uchar ctlmap[256] = +{ + NO, NO, NO, NO, NO, NO, NO, NO, + NO, NO, NO, NO, NO, NO, NO, NO, + C('Q'), C('W'), C('E'), C('R'), C('T'), C('Y'), C('U'), C('I'), + C('O'), C('P'), NO, NO, '\r', NO, C('A'), C('S'), + C('D'), C('F'), C('G'), C('H'), C('J'), C('K'), C('L'), NO, + NO, NO, NO, C('\\'), C('Z'), C('X'), C('C'), C('V'), + C('B'), C('N'), C('M'), NO, NO, C('/'), NO, NO, + [0x9C] '\r', // KP_Enter + [0xB5] C('/'), // KP_Div + [0xC8] KEY_UP, [0xD0] KEY_DN, + [0xC9] KEY_PGUP, [0xD1] KEY_PGDN, + [0xCB] KEY_LF, [0xCD] KEY_RT, + [0x97] KEY_HOME, [0xCF] KEY_END, + [0xD2] KEY_INS, [0xD3] KEY_DEL +}; + diff --git a/kernel/include/mp.h b/kernel/include/mp.h new file mode 100644 index 0000000..4d17283 --- /dev/null +++ b/kernel/include/mp.h @@ -0,0 +1,56 @@ +// See MultiProcessor Specification Version 1.[14] + +struct mp { // floating pointer + uchar signature[4]; // "_MP_" + void *physaddr; // phys addr of MP config table + uchar length; // 1 + uchar specrev; // [14] + uchar checksum; // all bytes must add up to 0 + uchar type; // MP system config type + uchar imcrp; + uchar reserved[3]; +}; + +struct mpconf { // configuration table header + uchar signature[4]; // "PCMP" + ushort length; // total table length + uchar version; // [14] + uchar checksum; // all bytes must add up to 0 + uchar product[20]; // product id + uint *oemtable; // OEM table pointer + ushort oemlength; // OEM table length + ushort entry; // entry count + uint *lapicaddr; // address of local APIC + ushort xlength; // extended table length + uchar xchecksum; // extended table checksum + uchar reserved; +}; + +struct mpproc { // processor table entry + uchar type; // entry type (0) + uchar apicid; // local APIC id + uchar version; // local APIC verison + uchar flags; // CPU flags + #define MPBOOT 0x02 // This proc is the bootstrap processor. + uchar signature[4]; // CPU signature + uint feature; // feature flags from CPUID instruction + uchar reserved[8]; +}; + +struct mpioapic { // I/O APIC table entry + uchar type; // entry type (2) + uchar apicno; // I/O APIC id + uchar version; // I/O APIC version + uchar flags; // I/O APIC flags + uint *addr; // I/O APIC address +}; + +// Table entry types +#define MPPROC 0x00 // One per processor +#define MPBUS 0x01 // One per bus +#define MPIOAPIC 0x02 // One per I/O APIC +#define MPIOINTR 0x03 // One per bus interrupt source +#define MPLINTR 0x04 // One per system interrupt source + +//PAGEBREAK! +// Blank page. diff --git a/kernel/include/proc.h b/kernel/include/proc.h new file mode 100644 index 0000000..0c574b9 --- /dev/null +++ b/kernel/include/proc.h @@ -0,0 +1,60 @@ +#include + +// Per-CPU state +struct cpu { + uchar apicid; // Local APIC ID + struct context *scheduler; // swtch() here to enter scheduler + struct taskstate ts; // Used by x86 to find stack for interrupt + struct segdesc gdt[NSEGS]; // x86 global descriptor table + atomic_uint started; // Has the CPU started? + int ncli; // Depth of pushcli nesting. + int intena; // Were interrupts enabled before pushcli? + struct proc *proc; // The process running on this cpu or null +}; + +extern struct cpu cpus[NCPU]; +extern int ncpu; + +//PAGEBREAK: 17 +// Saved registers for kernel context switches. +// Don't need to save all the segment registers (%cs, etc), +// because they are constant across kernel contexts. +// Don't need to save %eax, %ecx, %edx, because the +// x86 convention is that the caller has saved them. +// Contexts are stored at the bottom of the stack they +// describe; the stack pointer is the address of the context. +// The layout of the context matches the layout of the stack in swtch.S +// at the "Switch stacks" comment. Switch doesn't save eip explicitly, +// but it is on the stack and allocproc() manipulates it. +struct context { + uint edi; + uint esi; + uint ebx; + uint ebp; + uint eip; +}; + +enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; + +// Per-process state +struct proc { + uint sz; // Size of process memory (bytes) + pde_t* pgdir; // Page table + char *kstack; // Bottom of kernel stack for this process + enum procstate state; // Process state + int pid; // Process ID + struct proc *parent; // Parent process + struct trapframe *tf; // Trap frame for current syscall + struct context *context; // swtch() here to run process + void *chan; // If non-zero, sleeping on chan + int killed; // If non-zero, have been killed + struct file *ofile[NOFILE]; // Open files + struct inode *cwd; // Current directory + char name[16]; // Process name (debugging) +}; + +// Process memory is laid out contiguously, low addresses first: +// text +// original data and bss +// fixed-size stack +// expandable heap diff --git a/kernel/include/sleeplock.h b/kernel/include/sleeplock.h new file mode 100644 index 0000000..110e6f3 --- /dev/null +++ b/kernel/include/sleeplock.h @@ -0,0 +1,10 @@ +// Long-term locks for processes +struct sleeplock { + uint locked; // Is the lock held? + struct spinlock lk; // spinlock protecting this sleep lock + + // For debugging: + char *name; // Name of lock. + int pid; // Process holding lock +}; + diff --git a/kernel/include/spinlock.h b/kernel/include/spinlock.h new file mode 100644 index 0000000..947aaee --- /dev/null +++ b/kernel/include/spinlock.h @@ -0,0 +1,13 @@ +#include + +// Mutual exclusion lock. +struct spinlock { + atomic_uint locked; // Is the lock held? + + // For debugging: + char *name; // Name of lock. + struct cpu *cpu; // The cpu holding the lock. + uint pcs[10]; // The call stack (an array of program counters) + // that locked the lock. +}; + diff --git a/kernel/kernel.ld b/kernel/kernel.ld new file mode 100644 index 0000000..6ec16c3 --- /dev/null +++ b/kernel/kernel.ld @@ -0,0 +1,68 @@ +/* Simple linker script for the JOS kernel. + See the GNU ld 'info' manual ("info ld") to learn the syntax. */ + +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) + +SECTIONS +{ + /* Link the kernel at this address: "." means the current address */ + /* Must be equal to KERNLINK */ + . = 0x80100000; + + .text : AT(0x100000) { + *(.text .stub .text.* .gnu.linkonce.t.*) + } + + PROVIDE(etext = .); /* Define the 'etext' symbol to this value */ + + .rodata : { + *(.rodata .rodata.* .gnu.linkonce.r.*) + } + + /* Include debugging information in kernel memory */ + .stab : { + PROVIDE(__STAB_BEGIN__ = .); + *(.stab); + PROVIDE(__STAB_END__ = .); + BYTE(0) /* Force the linker to allocate space + for this section */ + } + + .stabstr : { + PROVIDE(__STABSTR_BEGIN__ = .); + *(.stabstr); + PROVIDE(__STABSTR_END__ = .); + BYTE(0) /* Force the linker to allocate space + for this section */ + } + + /* Adjust the address for the data segment to the next page */ + . = ALIGN(0x1000); + + /* Conventionally, Unix linkers provide pseudo-symbols + * etext, edata, and end, at the end of the text, data, and bss. + * For the kernel mapping, we need the address at the beginning + * of the data section, but that's not one of the conventional + * symbols, because the convention started before there was a + * read-only rodata section between text and data. */ + PROVIDE(data = .); + + /* The data segment */ + .data : AT(ADDR(.data) - 0x80000000) { + *(.data) + } + + PROVIDE(edata = .); + + .bss : { + *(.bss) + } + + PROVIDE(end = .); + + /DISCARD/ : { + *(.eh_frame .note.GNU-stack) + } +} diff --git a/kernel/src/asm/entry.S b/kernel/src/asm/entry.S new file mode 100644 index 0000000..b3f1afc --- /dev/null +++ b/kernel/src/asm/entry.S @@ -0,0 +1,68 @@ +# The xv6 kernel starts executing in this file. This file is linked with +# the kernel C code, so it can refer to kernel symbols such as main(). +# The boot block (bootasm.S and bootmain.c) jumps to entry below. + +# Multiboot header, for multiboot boot loaders like GNU Grub. +# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html +# +# Using GRUB 2, you can boot xv6 from a file stored in a +# Linux file system by copying kernel or kernelmemfs to /boot +# and then adding this menu entry: +# +# menuentry "xv6" { +# insmod ext2 +# set root='(hd0,msdos1)' +# set kernel='/boot/kernel' +# echo "Loading ${kernel}..." +# multiboot ${kernel} ${kernel} +# boot +# } + +#include "asm/asm.h" +#include "memlayout.h" +#include "mmu.h" +#include "param.h" + +# Multiboot header. Data to direct multiboot loader. +.p2align 2 +.text +.globl multiboot_header +multiboot_header: + #define magic 0x1badb002 + #define flags 0 + .long magic + .long flags + .long (-magic-flags) + +# By convention, the _start symbol specifies the ELF entry point. +# Since we haven't set up virtual memory yet, our entry point is +# the physical address of 'entry'. +.globl _start +_start = V2P_WO(entry) + +# Entering xv6 on boot processor, with paging off. +.globl entry +entry: + # Turn on page size extension for 4Mbyte pages + movl %cr4, %eax + orl $(CR4_PSE), %eax + movl %eax, %cr4 + # Set page directory + movl $(V2P_WO(entrypgdir)), %eax + movl %eax, %cr3 + # Turn on paging. + movl %cr0, %eax + orl $(CR0_PG|CR0_WP), %eax + movl %eax, %cr0 + + # Set up the stack pointer. + movl $(stack + KSTACKSIZE), %esp + + # Jump to main(), and switch to executing at + # high addresses. The indirect call is needed because + # the assembler produces a PC-relative instruction + # for a direct jump. + mov $main, %eax + jmp *%eax + +.comm stack, KSTACKSIZE diff --git a/kernel/src/asm/entryother.S b/kernel/src/asm/entryother.S new file mode 100644 index 0000000..757e7f9 --- /dev/null +++ b/kernel/src/asm/entryother.S @@ -0,0 +1,93 @@ +#include "asm/asm.h" +#include "memlayout.h" +#include "mmu.h" + +# Each non-boot CPU ("AP") is started up in response to a STARTUP +# IPI from the boot CPU. Section B.4.2 of the Multi-Processor +# Specification says that the AP will start in real mode with CS:IP +# set to XY00:0000, where XY is an 8-bit value sent with the +# STARTUP. Thus this code must start at a 4096-byte boundary. +# +# Because this code sets DS to zero, it must sit +# at an address in the low 2^16 bytes. +# +# Startothers (in main.c) sends the STARTUPs one at a time. +# It copies this code (start) at 0x7000. It puts the address of +# a newly allocated per-core stack in start-4,the address of the +# place to jump to (mpenter) in start-8, and the physical address +# of entrypgdir in start-12. +# +# This code combines elements of bootasm.S and entry.S. + +.code16 +.globl start +start: + cli + + # Zero data segment registers DS, ES, and SS. + xorw %ax,%ax + movw %ax,%ds + movw %ax,%es + movw %ax,%ss + + # Switch from real to protected mode. Use a bootstrap GDT that makes + # virtual addresses map directly to physical addresses so that the + # effective memory map doesn't change during the transition. + lgdt gdtdesc + movl %cr0, %eax + orl $CR0_PE, %eax + movl %eax, %cr0 + + # Complete the transition to 32-bit protected mode by using a long jmp + # to reload %cs and %eip. The segment descriptors are set up with no + # translation, so that the mapping is still the identity mapping. + ljmpl $(SEG_KCODE<<3), $(start32) + +//PAGEBREAK! +.code32 # Tell assembler to generate 32-bit code now. +start32: + # Set up the protected-mode data segment registers + movw $(SEG_KDATA<<3), %ax # Our data segment selector + movw %ax, %ds # -> DS: Data Segment + movw %ax, %es # -> ES: Extra Segment + movw %ax, %ss # -> SS: Stack Segment + movw $0, %ax # Zero segments not ready for use + movw %ax, %fs # -> FS + movw %ax, %gs # -> GS + + # Turn on page size extension for 4Mbyte pages + movl %cr4, %eax + orl $(CR4_PSE), %eax + movl %eax, %cr4 + # Use entrypgdir as our initial page table + movl (start-12), %eax + movl %eax, %cr3 + # Turn on paging. + movl %cr0, %eax + orl $(CR0_PE|CR0_PG|CR0_WP), %eax + movl %eax, %cr0 + + # Switch to the stack allocated by startothers() + movl (start-4), %esp + # Call mpenter() + call *(start-8) + + movw $0x8a00, %ax + movw %ax, %dx + outw %ax, %dx + movw $0x8ae0, %ax + outw %ax, %dx +spin: + jmp spin + +.p2align 2 +gdt: + SEG_NULLASM + SEG_ASM(STA_X|STA_R, 0, 0xffffffff) + SEG_ASM(STA_W, 0, 0xffffffff) + + +gdtdesc: + .word (gdtdesc - gdt - 1) + .long gdt + diff --git a/kernel/src/asm/initcode.S b/kernel/src/asm/initcode.S new file mode 100644 index 0000000..80ac5d8 --- /dev/null +++ b/kernel/src/asm/initcode.S @@ -0,0 +1,32 @@ +# Initial process execs /init. +# This code runs in user space. + +#include "syscall.h" +#include "traps.h" + + +# exec(init, argv) +.globl start +start: + pushl $argv + pushl $init + pushl $0 // where caller pc would be + movl $SYS_exec, %eax + int $T_SYSCALL + +# for(;;) exit(); +exit: + movl $SYS_exit, %eax + int $T_SYSCALL + jmp exit + +# char init[] = "/init\0"; +init: + .string "/init\0" + +# char *argv[] = { init, 0 }; +.p2align 2 +argv: + .long init + .long 0 + diff --git a/kernel/src/asm/swtch.S b/kernel/src/asm/swtch.S new file mode 100644 index 0000000..63a7dcc --- /dev/null +++ b/kernel/src/asm/swtch.S @@ -0,0 +1,29 @@ +# Context switch +# +# void swtch(struct context **old, struct context *new); +# +# Save the current registers on the stack, creating +# a struct context, and save its address in *old. +# Switch stacks to new and pop previously-saved registers. + +.globl swtch +swtch: + movl 4(%esp), %eax + movl 8(%esp), %edx + + # Save old callee-saved registers + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + # Switch stacks + movl %esp, (%eax) + movl %edx, %esp + + # Load new callee-saved registers + popl %edi + popl %esi + popl %ebx + popl %ebp + ret diff --git a/kernel/src/asm/trapasm.S b/kernel/src/asm/trapasm.S new file mode 100644 index 0000000..da8aefc --- /dev/null +++ b/kernel/src/asm/trapasm.S @@ -0,0 +1,32 @@ +#include "mmu.h" + + # vectors.S sends all traps here. +.globl alltraps +alltraps: + # Build trap frame. + pushl %ds + pushl %es + pushl %fs + pushl %gs + pushal + + # Set up data segments. + movw $(SEG_KDATA<<3), %ax + movw %ax, %ds + movw %ax, %es + + # Call trap(tf), where tf=%esp + pushl %esp + call trap + addl $4, %esp + + # Return falls through to trapret... +.globl trapret +trapret: + popal + popl %gs + popl %fs + popl %es + popl %ds + addl $0x8, %esp # trapno and errcode + iret diff --git a/kernel/src/bio.c b/kernel/src/bio.c new file mode 100644 index 0000000..a45ff3e --- /dev/null +++ b/kernel/src/bio.c @@ -0,0 +1,144 @@ +// Buffer cache. +// +// The buffer cache is a linked list of buf structures holding +// cached copies of disk block contents. Caching disk blocks +// in memory reduces the number of disk reads and also provides +// a synchronization point for disk blocks used by multiple processes. +// +// Interface: +// * To get a buffer for a particular disk block, call bread. +// * After changing buffer data, call bwrite to write it to disk. +// * When done with the buffer, call brelse. +// * Do not use the buffer after calling brelse. +// * Only one process at a time can use a buffer, +// so do not keep them longer than necessary. +// +// The implementation uses two state flags internally: +// * B_VALID: the buffer data has been read from the disk. +// * B_DIRTY: the buffer data has been modified +// and needs to be written to disk. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +struct { + struct spinlock lock; + struct buf buf[NBUF]; + + // Linked list of all buffers, through prev/next. + // head.next is most recently used. + struct buf head; +} bcache; + +void +binit(void) +{ + struct buf *b; + + initlock(&bcache.lock, "bcache"); + +//PAGEBREAK! + // Create linked list of buffers + bcache.head.prev = &bcache.head; + bcache.head.next = &bcache.head; + for(b = bcache.buf; b < bcache.buf+NBUF; b++){ + b->next = bcache.head.next; + b->prev = &bcache.head; + initsleeplock(&b->lock, "buffer"); + bcache.head.next->prev = b; + bcache.head.next = b; + } +} + +// Look through buffer cache for block on device dev. +// If not found, allocate a buffer. +// In either case, return locked buffer. +static struct buf* +bget(uint dev, uint blockno) +{ + struct buf *b; + + acquire(&bcache.lock); + + // Is the block already cached? + for(b = bcache.head.next; b != &bcache.head; b = b->next){ + if(b->dev == dev && b->blockno == blockno){ + b->refcnt++; + release(&bcache.lock); + acquiresleep(&b->lock); + return b; + } + } + + // Not cached; recycle an unused buffer. + // Even if refcnt==0, B_DIRTY indicates a buffer is in use + // because log.c has modified it but not yet committed it. + for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ + if(b->refcnt == 0 && (b->flags & B_DIRTY) == 0) { + b->dev = dev; + b->blockno = blockno; + b->flags = 0; + b->refcnt = 1; + release(&bcache.lock); + acquiresleep(&b->lock); + return b; + } + } + panic("bget: no buffers"); +} + +// Return a locked buf with the contents of the indicated block. +struct buf* +bread(uint dev, uint blockno) +{ + struct buf *b; + + b = bget(dev, blockno); + if((b->flags & B_VALID) == 0) { + iderw(b); + } + return b; +} + +// Write b's contents to disk. Must be locked. +void +bwrite(struct buf *b) +{ + if(!holdingsleep(&b->lock)) + panic("bwrite"); + b->flags |= B_DIRTY; + iderw(b); +} + +// Release a locked buffer. +// Move to the head of the MRU list. +void +brelse(struct buf *b) +{ + if(!holdingsleep(&b->lock)) + panic("brelse"); + + releasesleep(&b->lock); + + acquire(&bcache.lock); + b->refcnt--; + if (b->refcnt == 0) { + // no one is waiting for it. + b->next->prev = b->prev; + b->prev->next = b->next; + b->next = bcache.head.next; + b->prev = &bcache.head; + bcache.head.next->prev = b; + bcache.head.next = b; + } + + release(&bcache.lock); +} +//PAGEBREAK! +// Blank page. + diff --git a/kernel/src/console.c b/kernel/src/console.c new file mode 100644 index 0000000..7dd649d --- /dev/null +++ b/kernel/src/console.c @@ -0,0 +1,300 @@ +// Console input and output. +// Input is from the keyboard or serial port. +// Output is written to the screen and serial port. + +#include "asm/x86.h" + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "traps.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" + +static void consputc(int); + +static int panicked = 0; + +static struct { + struct spinlock lock; + int locking; +} cons; + +static void +printint(int xx, int base, int sign) +{ + static char digits[] = "0123456789abcdef"; + char buf[16]; + int i; + uint x; + + if(sign && (sign = xx < 0)) + x = -xx; + else + x = xx; + + i = 0; + do{ + buf[i++] = digits[x % base]; + }while((x /= base) != 0); + + if(sign) + buf[i++] = '-'; + + while(--i >= 0) + consputc(buf[i]); +} +//PAGEBREAK: 50 + +// Print to the console. only understands %d, %x, %p, %s. +void +cprintf(char *fmt, ...) +{ + int i, c, locking; + uint *argp; + char *s; + + locking = cons.locking; + if(locking) + acquire(&cons.lock); + + if (fmt == 0) + panic("null fmt"); + + argp = (uint*)(void*)(&fmt + 1); + for(i = 0; (c = fmt[i] & 0xff) != 0; i++){ + if(c != '%'){ + consputc(c); + continue; + } + c = fmt[++i] & 0xff; + if(c == 0) + break; + switch(c){ + case 'd': + printint(*argp++, 10, 1); + break; + case 'x': + case 'p': + printint(*argp++, 16, 0); + break; + case 's': + if((s = (char*)*argp++) == 0) + s = "(null)"; + for(; *s; s++) + consputc(*s); + break; + case '%': + consputc('%'); + break; + default: + // Print unknown % sequence to draw attention. + consputc('%'); + consputc(c); + break; + } + } + + if(locking) + release(&cons.lock); +} + +void +panic(char *s) +{ + int i; + uint pcs[10]; + + cli(); + cons.locking = 0; + // use lapiccpunum so that we can call panic from mycpu() + cprintf("lapicid %d: panic: ", lapicid()); + cprintf(s); + cprintf("\n"); + getcallerpcs(&s, pcs); + for(i=0; i<10; i++) + cprintf(" %p", pcs[i]); + panicked = 1; // freeze other CPU + for(;;) + ; +} + +//PAGEBREAK: 50 +#define BACKSPACE 0x100 +#define CRTPORT 0x3d4 +static ushort *crt = (ushort*)P2V(0xb8000); // CGA memory + +static void +cgaputc(int c) +{ + int pos; + + // Cursor position: col + 80*row. + outb(CRTPORT, 14); + pos = inb(CRTPORT+1) << 8; + outb(CRTPORT, 15); + pos |= inb(CRTPORT+1); + + if(c == '\n') + pos += 80 - pos%80; + else if(c == BACKSPACE){ + if(pos > 0) --pos; + } else + crt[pos++] = (c&0xff) | 0x0700; // black on white + + if(pos < 0 || pos > 25*80) + panic("pos under/overflow"); + + if((pos/80) >= 24){ // Scroll up. + memmove(crt, crt+80, sizeof(crt[0])*23*80); + pos -= 80; + memset(crt+pos, 0, sizeof(crt[0])*(24*80 - pos)); + } + + outb(CRTPORT, 14); + outb(CRTPORT+1, pos>>8); + outb(CRTPORT, 15); + outb(CRTPORT+1, pos); + crt[pos] = ' ' | 0x0700; +} + +void +consputc(int c) +{ + if(panicked){ + cli(); + for(;;) + ; + } + + if(c == BACKSPACE){ + uartputc('\b'); uartputc(' '); uartputc('\b'); + } else + uartputc(c); + cgaputc(c); +} + +#define INPUT_BUF 128 +struct { + char buf[INPUT_BUF]; + uint r; // Read index + uint w; // Write index + uint e; // Edit index +} input; + +#define C(x) ((x)-'@') // Control-x + +void +consoleintr(int (*getc)(void)) +{ + int c, doprocdump = 0; + + acquire(&cons.lock); + while((c = getc()) >= 0){ + switch(c){ + case C('P'): // Process listing. + // procdump() locks cons.lock indirectly; invoke later + doprocdump = 1; + break; + case C('U'): // Kill line. + while(input.e != input.w && + input.buf[(input.e-1) % INPUT_BUF] != '\n'){ + input.e--; + consputc(BACKSPACE); + } + break; + case C('H'): case '\x7f': // Backspace + if(input.e != input.w){ + input.e--; + consputc(BACKSPACE); + } + break; + default: + if(c != 0 && input.e-input.r < INPUT_BUF){ + c = (c == '\r') ? '\n' : c; + input.buf[input.e++ % INPUT_BUF] = c; + consputc(c); + if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){ + input.w = input.e; + wakeup(&input.r); + } + } + break; + } + } + release(&cons.lock); + if(doprocdump) { + procdump(); // now call procdump() wo. cons.lock held + } +} + +int +consoleread(struct inode *ip, char *dst, int n) +{ + int target; + int c; + + iunlock(ip); + target = n; + acquire(&cons.lock); + while(n > 0){ + while(input.r == input.w){ + if(myproc()->killed){ + release(&cons.lock); + ilock(ip); + return -1; + } + sleep(&input.r, &cons.lock); + } + c = input.buf[input.r++ % INPUT_BUF]; + if(c == C('D')){ // EOF + if(n < target){ + // Save ^D for next time, to make sure + // caller gets a 0-byte result. + input.r--; + } + break; + } + *dst++ = c; + --n; + if(c == '\n') + break; + } + release(&cons.lock); + ilock(ip); + + return target - n; +} + +int +consolewrite(struct inode *ip, char *buf, int n) +{ + int i; + + iunlock(ip); + acquire(&cons.lock); + for(i = 0; i < n; i++) + consputc(buf[i] & 0xff); + release(&cons.lock); + ilock(ip); + + return n; +} + +void +consoleinit(void) +{ + initlock(&cons.lock, "console"); + + devsw[CONSOLE].write = consolewrite; + devsw[CONSOLE].read = consoleread; + cons.locking = 1; + + ioapicenable(IRQ_KBD, 0); +} + diff --git a/kernel/src/exec.c b/kernel/src/exec.c new file mode 100644 index 0000000..3cde317 --- /dev/null +++ b/kernel/src/exec.c @@ -0,0 +1,115 @@ +#include "asm/x86.h" + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "defs.h" +#include "elf.h" + +int +exec(char *path, char **argv) +{ + char *s, *last; + int i, off; + uint argc, sz, sp, ustack[3+MAXARG+1]; + struct elfhdr elf; + struct inode *ip; + struct proghdr ph; + pde_t *pgdir, *oldpgdir; + struct proc *curproc = myproc(); + + begin_op(); + + if((ip = namei(path)) == 0){ + end_op(); + cprintf("exec: fail\n"); + return -1; + } + ilock(ip); + pgdir = 0; + + // Check ELF header + if(readi(ip, (char*)&elf, 0, sizeof(elf)) != sizeof(elf)) + goto bad; + if(elf.magic != ELF_MAGIC) + goto bad; + + if((pgdir = setupkvm()) == 0) + goto bad; + + // Load program into memory. + sz = 0; + for(i=0, off=elf.phoff; i= MAXARG) + goto bad; + sp = (sp - (strlen(argv[argc]) + 1)) & ~3; + if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0) + goto bad; + ustack[3+argc] = sp; + } + ustack[3+argc] = 0; + + ustack[0] = 0xffffffff; // fake return PC + ustack[1] = argc; + ustack[2] = sp - (argc+1)*4; // argv pointer + + sp -= (3+argc+1) * 4; + if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0) + goto bad; + + // Save program name for debugging. + for(last=s=path; *s; s++) + if(*s == '/') + last = s+1; + safestrcpy(curproc->name, last, sizeof(curproc->name)); + + // Commit to the user image. + oldpgdir = curproc->pgdir; + curproc->pgdir = pgdir; + curproc->sz = sz; + curproc->tf->eip = elf.entry; // main + curproc->tf->esp = sp; + switchuvm(curproc); + freevm(oldpgdir); + return 0; + + bad: + if(pgdir) + freevm(pgdir); + if(ip){ + iunlockput(ip); + end_op(); + } + return -1; +} diff --git a/kernel/src/file.c b/kernel/src/file.c new file mode 100644 index 0000000..24b32c2 --- /dev/null +++ b/kernel/src/file.c @@ -0,0 +1,157 @@ +// +// File descriptors +// + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" + +struct devsw devsw[NDEV]; +struct { + struct spinlock lock; + struct file file[NFILE]; +} ftable; + +void +fileinit(void) +{ + initlock(&ftable.lock, "ftable"); +} + +// Allocate a file structure. +struct file* +filealloc(void) +{ + struct file *f; + + acquire(&ftable.lock); + for(f = ftable.file; f < ftable.file + NFILE; f++){ + if(f->ref == 0){ + f->ref = 1; + release(&ftable.lock); + return f; + } + } + release(&ftable.lock); + return 0; +} + +// Increment ref count for file f. +struct file* +filedup(struct file *f) +{ + acquire(&ftable.lock); + if(f->ref < 1) + panic("filedup"); + f->ref++; + release(&ftable.lock); + return f; +} + +// Close file f. (Decrement ref count, close when reaches 0.) +void +fileclose(struct file *f) +{ + struct file ff; + + acquire(&ftable.lock); + if(f->ref < 1) + panic("fileclose"); + if(--f->ref > 0){ + release(&ftable.lock); + return; + } + ff = *f; + f->ref = 0; + f->type = FD_NONE; + release(&ftable.lock); + + if(ff.type == FD_PIPE) + pipeclose(ff.pipe, ff.writable); + else if(ff.type == FD_INODE){ + begin_op(); + iput(ff.ip); + end_op(); + } +} + +// Get metadata about file f. +int +filestat(struct file *f, struct stat *st) +{ + if(f->type == FD_INODE){ + ilock(f->ip); + stati(f->ip, st); + iunlock(f->ip); + return 0; + } + return -1; +} + +// Read from file f. +int +fileread(struct file *f, char *addr, int n) +{ + int r; + + if(f->readable == 0) + return -1; + if(f->type == FD_PIPE) + return piperead(f->pipe, addr, n); + if(f->type == FD_INODE){ + ilock(f->ip); + if((r = readi(f->ip, addr, f->off, n)) > 0) + f->off += r; + iunlock(f->ip); + return r; + } + panic("fileread"); +} + +//PAGEBREAK! +// Write to file f. +int +filewrite(struct file *f, char *addr, int n) +{ + int r; + + if(f->writable == 0) + return -1; + if(f->type == FD_PIPE) + return pipewrite(f->pipe, addr, n); + if(f->type == FD_INODE){ + // write a few blocks at a time to avoid exceeding + // the maximum log transaction size, including + // i-node, indirect block, allocation blocks, + // and 2 blocks of slop for non-aligned writes. + // this really belongs lower down, since writei() + // might be writing a device like the console. + int max = ((MAXOPBLOCKS-1-1-2) / 2) * 512; + int i = 0; + while(i < n){ + int n1 = n - i; + if(n1 > max) + n1 = max; + + begin_op(); + ilock(f->ip); + if ((r = writei(f->ip, addr + i, f->off, n1)) > 0) + f->off += r; + iunlock(f->ip); + end_op(); + + if(r < 0) + break; + if(r != n1) + panic("short filewrite"); + i += r; + } + return i == n ? n : -1; + } + panic("filewrite"); +} + diff --git a/kernel/src/fs.c b/kernel/src/fs.c new file mode 100644 index 0000000..f77275f --- /dev/null +++ b/kernel/src/fs.c @@ -0,0 +1,670 @@ +// File system implementation. Five layers: +// + Blocks: allocator for raw disk blocks. +// + Log: crash recovery for multi-step updates. +// + Files: inode allocator, reading, writing, metadata. +// + Directories: inode with special contents (list of other inodes!) +// + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. +// +// This file contains the low-level file system manipulation +// routines. The (higher-level) system call implementations +// are in sysfile.c. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "stat.h" +#include "mmu.h" +#include "proc.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" +#include "file.h" + +#define min(a, b) ((a) < (b) ? (a) : (b)) +static void itrunc(struct inode*); +// there should be one superblock per disk device, but we run with +// only one device +struct superblock sb; + +// Read the super block. +void +readsb(int dev, struct superblock *sb) +{ + struct buf *bp; + + bp = bread(dev, 1); + memmove(sb, bp->data, sizeof(*sb)); + brelse(bp); +} + +// Zero a block. +static void +bzero(int dev, int bno) +{ + struct buf *bp; + + bp = bread(dev, bno); + memset(bp->data, 0, BSIZE); + log_write(bp); + brelse(bp); +} + +// Blocks. + +// Allocate a zeroed disk block. +static uint +balloc(uint dev) +{ + int b, bi, m; + struct buf *bp; + + bp = 0; + for(b = 0; b < sb.size; b += BPB){ + bp = bread(dev, BBLOCK(b, sb)); + for(bi = 0; bi < BPB && b + bi < sb.size; bi++){ + m = 1 << (bi % 8); + if((bp->data[bi/8] & m) == 0){ // Is block free? + bp->data[bi/8] |= m; // Mark block in use. + log_write(bp); + brelse(bp); + bzero(dev, b + bi); + return b + bi; + } + } + brelse(bp); + } + panic("balloc: out of blocks"); +} + +// Free a disk block. +static void +bfree(int dev, uint b) +{ + struct buf *bp; + int bi, m; + + bp = bread(dev, BBLOCK(b, sb)); + bi = b % BPB; + m = 1 << (bi % 8); + if((bp->data[bi/8] & m) == 0) + panic("freeing free block"); + bp->data[bi/8] &= ~m; + log_write(bp); + brelse(bp); +} + +// Inodes. +// +// An inode describes a single unnamed file. +// The inode disk structure holds metadata: the file's type, +// its size, the number of links referring to it, and the +// list of blocks holding the file's content. +// +// The inodes are laid out sequentially on disk at +// sb.startinode. Each inode has a number, indicating its +// position on the disk. +// +// The kernel keeps a cache of in-use inodes in memory +// to provide a place for synchronizing access +// to inodes used by multiple processes. The cached +// inodes include book-keeping information that is +// not stored on disk: ip->ref and ip->valid. +// +// An inode and its in-memory representation go through a +// sequence of states before they can be used by the +// rest of the file system code. +// +// * Allocation: an inode is allocated if its type (on disk) +// is non-zero. ialloc() allocates, and iput() frees if +// the reference and link counts have fallen to zero. +// +// * Referencing in cache: an entry in the inode cache +// is free if ip->ref is zero. Otherwise ip->ref tracks +// the number of in-memory pointers to the entry (open +// files and current directories). iget() finds or +// creates a cache entry and increments its ref; iput() +// decrements ref. +// +// * Valid: the information (type, size, &c) in an inode +// cache entry is only correct when ip->valid is 1. +// ilock() reads the inode from +// the disk and sets ip->valid, while iput() clears +// ip->valid if ip->ref has fallen to zero. +// +// * Locked: file system code may only examine and modify +// the information in an inode and its content if it +// has first locked the inode. +// +// Thus a typical sequence is: +// ip = iget(dev, inum) +// ilock(ip) +// ... examine and modify ip->xxx ... +// iunlock(ip) +// iput(ip) +// +// ilock() is separate from iget() so that system calls can +// get a long-term reference to an inode (as for an open file) +// and only lock it for short periods (e.g., in read()). +// The separation also helps avoid deadlock and races during +// pathname lookup. iget() increments ip->ref so that the inode +// stays cached and pointers to it remain valid. +// +// Many internal file system functions expect the caller to +// have locked the inodes involved; this lets callers create +// multi-step atomic operations. +// +// The icache.lock spin-lock protects the allocation of icache +// entries. Since ip->ref indicates whether an entry is free, +// and ip->dev and ip->inum indicate which i-node an entry +// holds, one must hold icache.lock while using any of those fields. +// +// An ip->lock sleep-lock protects all ip-> fields other than ref, +// dev, and inum. One must hold ip->lock in order to +// read or write that inode's ip->valid, ip->size, ip->type, &c. + +struct { + struct spinlock lock; + struct inode inode[NINODE]; +} icache; + +void +iinit(int dev) +{ + int i = 0; + + initlock(&icache.lock, "icache"); + for(i = 0; i < NINODE; i++) { + initsleeplock(&icache.inode[i].lock, "inode"); + } + + readsb(dev, &sb); + cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d\ + inodestart %d bmap start %d\n", sb.size, sb.nblocks, + sb.ninodes, sb.nlog, sb.logstart, sb.inodestart, + sb.bmapstart); +} + +static struct inode* iget(uint dev, uint inum); + +//PAGEBREAK! +// Allocate an inode on device dev. +// Mark it as allocated by giving it type type. +// Returns an unlocked but allocated and referenced inode. +struct inode* +ialloc(uint dev, short type) +{ + int inum; + struct buf *bp; + struct dinode *dip; + + for(inum = 1; inum < sb.ninodes; inum++){ + bp = bread(dev, IBLOCK(inum, sb)); + dip = (struct dinode*)bp->data + inum%IPB; + if(dip->type == 0){ // a free inode + memset(dip, 0, sizeof(*dip)); + dip->type = type; + log_write(bp); // mark it allocated on the disk + brelse(bp); + return iget(dev, inum); + } + brelse(bp); + } + panic("ialloc: no inodes"); +} + +// Copy a modified in-memory inode to disk. +// Must be called after every change to an ip->xxx field +// that lives on disk, since i-node cache is write-through. +// Caller must hold ip->lock. +void +iupdate(struct inode *ip) +{ + struct buf *bp; + struct dinode *dip; + + bp = bread(ip->dev, IBLOCK(ip->inum, sb)); + dip = (struct dinode*)bp->data + ip->inum%IPB; + dip->type = ip->type; + dip->major = ip->major; + dip->minor = ip->minor; + dip->nlink = ip->nlink; + dip->size = ip->size; + memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); + log_write(bp); + brelse(bp); +} + +// Find the inode with number inum on device dev +// and return the in-memory copy. Does not lock +// the inode and does not read it from disk. +static struct inode* +iget(uint dev, uint inum) +{ + struct inode *ip, *empty; + + acquire(&icache.lock); + + // Is the inode already cached? + empty = 0; + for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){ + if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){ + ip->ref++; + release(&icache.lock); + return ip; + } + if(empty == 0 && ip->ref == 0) // Remember empty slot. + empty = ip; + } + + // Recycle an inode cache entry. + if(empty == 0) + panic("iget: no inodes"); + + ip = empty; + ip->dev = dev; + ip->inum = inum; + ip->ref = 1; + ip->valid = 0; + release(&icache.lock); + + return ip; +} + +// Increment reference count for ip. +// Returns ip to enable ip = idup(ip1) idiom. +struct inode* +idup(struct inode *ip) +{ + acquire(&icache.lock); + ip->ref++; + release(&icache.lock); + return ip; +} + +// Lock the given inode. +// Reads the inode from disk if necessary. +void +ilock(struct inode *ip) +{ + struct buf *bp; + struct dinode *dip; + + if(ip == 0 || ip->ref < 1) + panic("ilock"); + + acquiresleep(&ip->lock); + + if(ip->valid == 0){ + bp = bread(ip->dev, IBLOCK(ip->inum, sb)); + dip = (struct dinode*)bp->data + ip->inum%IPB; + ip->type = dip->type; + ip->major = dip->major; + ip->minor = dip->minor; + ip->nlink = dip->nlink; + ip->size = dip->size; + memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); + brelse(bp); + ip->valid = 1; + if(ip->type == 0) + panic("ilock: no type"); + } +} + +// Unlock the given inode. +void +iunlock(struct inode *ip) +{ + if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1) + panic("iunlock"); + + releasesleep(&ip->lock); +} + +// Drop a reference to an in-memory inode. +// If that was the last reference, the inode cache entry can +// be recycled. +// If that was the last reference and the inode has no links +// to it, free the inode (and its content) on disk. +// All calls to iput() must be inside a transaction in +// case it has to free the inode. +void +iput(struct inode *ip) +{ + acquiresleep(&ip->lock); + if(ip->valid && ip->nlink == 0){ + acquire(&icache.lock); + int r = ip->ref; + release(&icache.lock); + if(r == 1){ + // inode has no links and no other references: truncate and free. + itrunc(ip); + ip->type = 0; + iupdate(ip); + ip->valid = 0; + } + } + releasesleep(&ip->lock); + + acquire(&icache.lock); + ip->ref--; + release(&icache.lock); +} + +// Common idiom: unlock, then put. +void +iunlockput(struct inode *ip) +{ + iunlock(ip); + iput(ip); +} + +//PAGEBREAK! +// Inode content +// +// The content (data) associated with each inode is stored +// in blocks on the disk. The first NDIRECT block numbers +// are listed in ip->addrs[]. The next NINDIRECT blocks are +// listed in block ip->addrs[NDIRECT]. + +// Return the disk block address of the nth block in inode ip. +// If there is no such block, bmap allocates one. +static uint +bmap(struct inode *ip, uint bn) +{ + uint addr, *a; + struct buf *bp; + + if(bn < NDIRECT){ + if((addr = ip->addrs[bn]) == 0) + ip->addrs[bn] = addr = balloc(ip->dev); + return addr; + } + bn -= NDIRECT; + + if(bn < NINDIRECT){ + // Load indirect block, allocating if necessary. + if((addr = ip->addrs[NDIRECT]) == 0) + ip->addrs[NDIRECT] = addr = balloc(ip->dev); + bp = bread(ip->dev, addr); + a = (uint*)bp->data; + if((addr = a[bn]) == 0){ + a[bn] = addr = balloc(ip->dev); + log_write(bp); + } + brelse(bp); + return addr; + } + + panic("bmap: out of range"); +} + +// Truncate inode (discard contents). +// Only called when the inode has no links +// to it (no directory entries referring to it) +// and has no in-memory reference to it (is +// not an open file or current directory). +static void +itrunc(struct inode *ip) +{ + int i, j; + struct buf *bp; + uint *a; + + for(i = 0; i < NDIRECT; i++){ + if(ip->addrs[i]){ + bfree(ip->dev, ip->addrs[i]); + ip->addrs[i] = 0; + } + } + + if(ip->addrs[NDIRECT]){ + bp = bread(ip->dev, ip->addrs[NDIRECT]); + a = (uint*)bp->data; + for(j = 0; j < NINDIRECT; j++){ + if(a[j]) + bfree(ip->dev, a[j]); + } + brelse(bp); + bfree(ip->dev, ip->addrs[NDIRECT]); + ip->addrs[NDIRECT] = 0; + } + + ip->size = 0; + iupdate(ip); +} + +// Copy stat information from inode. +// Caller must hold ip->lock. +void +stati(struct inode *ip, struct stat *st) +{ + st->dev = ip->dev; + st->ino = ip->inum; + st->type = ip->type; + st->nlink = ip->nlink; + st->size = ip->size; +} + +//PAGEBREAK! +// Read data from inode. +// Caller must hold ip->lock. +int +readi(struct inode *ip, char *dst, uint off, uint n) +{ + uint tot, m; + struct buf *bp; + + if(ip->type == T_DEV){ + if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) + return -1; + return devsw[ip->major].read(ip, dst, n); + } + + if(off > ip->size || off + n < off) + return -1; + if(off + n > ip->size) + n = ip->size - off; + + for(tot=0; totdev, bmap(ip, off/BSIZE)); + m = min(n - tot, BSIZE - off%BSIZE); + memmove(dst, bp->data + off%BSIZE, m); + brelse(bp); + } + return n; +} + +// PAGEBREAK! +// Write data to inode. +// Caller must hold ip->lock. +int +writei(struct inode *ip, char *src, uint off, uint n) +{ + uint tot, m; + struct buf *bp; + + if(ip->type == T_DEV){ + if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write) + return -1; + return devsw[ip->major].write(ip, src, n); + } + + if(off > ip->size || off + n < off) + return -1; + if(off + n > MAXFILE*BSIZE) + return -1; + + for(tot=0; totdev, bmap(ip, off/BSIZE)); + m = min(n - tot, BSIZE - off%BSIZE); + memmove(bp->data + off%BSIZE, src, m); + log_write(bp); + brelse(bp); + } + + if(n > 0 && off > ip->size){ + ip->size = off; + iupdate(ip); + } + return n; +} + +//PAGEBREAK! +// Directories + +int +namecmp(const char *s, const char *t) +{ + return strncmp(s, t, DIRSIZ); +} + +// Look for a directory entry in a directory. +// If found, set *poff to byte offset of entry. +struct inode* +dirlookup(struct inode *dp, char *name, uint *poff) +{ + uint off, inum; + struct dirent de; + + if(dp->type != T_DIR) + panic("dirlookup not DIR"); + + for(off = 0; off < dp->size; off += sizeof(de)){ + if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) + panic("dirlookup read"); + if(de.inum == 0) + continue; + if(namecmp(name, de.name) == 0){ + // entry matches path element + if(poff) + *poff = off; + inum = de.inum; + return iget(dp->dev, inum); + } + } + + return 0; +} + +// Write a new directory entry (name, inum) into the directory dp. +int +dirlink(struct inode *dp, char *name, uint inum) +{ + int off; + struct dirent de; + struct inode *ip; + + // Check that name is not present. + if((ip = dirlookup(dp, name, 0)) != 0){ + iput(ip); + return -1; + } + + // Look for an empty dirent. + for(off = 0; off < dp->size; off += sizeof(de)){ + if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) + panic("dirlink read"); + if(de.inum == 0) + break; + } + + strncpy(de.name, name, DIRSIZ); + de.inum = inum; + if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) + panic("dirlink"); + + return 0; +} + +//PAGEBREAK! +// Paths + +// Copy the next path element from path into name. +// Return a pointer to the element following the copied one. +// The returned path has no leading slashes, +// so the caller can check *path=='\0' to see if the name is the last one. +// If no name to remove, return 0. +// +// Examples: +// skipelem("a/bb/c", name) = "bb/c", setting name = "a" +// skipelem("///a//bb", name) = "bb", setting name = "a" +// skipelem("a", name) = "", setting name = "a" +// skipelem("", name) = skipelem("////", name) = 0 +// +static char* +skipelem(char *path, char *name) +{ + char *s; + int len; + + while(*path == '/') + path++; + if(*path == 0) + return 0; + s = path; + while(*path != '/' && *path != 0) + path++; + len = path - s; + if(len >= DIRSIZ) + memmove(name, s, DIRSIZ); + else { + memmove(name, s, len); + name[len] = 0; + } + while(*path == '/') + path++; + return path; +} + +// Look up and return the inode for a path name. +// If parent != 0, return the inode for the parent and copy the final +// path element into name, which must have room for DIRSIZ bytes. +// Must be called inside a transaction since it calls iput(). +static struct inode* +namex(char *path, int nameiparent, char *name) +{ + struct inode *ip, *next; + + if(*path == '/') + ip = iget(ROOTDEV, ROOTINO); + else + ip = idup(myproc()->cwd); + + while((path = skipelem(path, name)) != 0){ + ilock(ip); + if(ip->type != T_DIR){ + iunlockput(ip); + return 0; + } + if(nameiparent && *path == '\0'){ + // Stop one level early. + iunlock(ip); + return ip; + } + if((next = dirlookup(ip, name, 0)) == 0){ + iunlockput(ip); + return 0; + } + iunlockput(ip); + ip = next; + } + if(nameiparent){ + iput(ip); + return 0; + } + return ip; +} + +struct inode* +namei(char *path) +{ + char name[DIRSIZ]; + return namex(path, 0, name); +} + +struct inode* +nameiparent(char *path, char *name) +{ + return namex(path, 1, name); +} diff --git a/kernel/src/ide.c b/kernel/src/ide.c new file mode 100644 index 0000000..4f1e6eb --- /dev/null +++ b/kernel/src/ide.c @@ -0,0 +1,169 @@ +// Simple PIO-based (non-DMA) IDE driver code. + +#include "asm/x86.h" + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "traps.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +#define SECTOR_SIZE 512 +#define IDE_BSY 0x80 +#define IDE_DRDY 0x40 +#define IDE_DF 0x20 +#define IDE_ERR 0x01 + +#define IDE_CMD_READ 0x20 +#define IDE_CMD_WRITE 0x30 +#define IDE_CMD_RDMUL 0xc4 +#define IDE_CMD_WRMUL 0xc5 + +// idequeue points to the buf now being read/written to the disk. +// idequeue->qnext points to the next buf to be processed. +// You must hold idelock while manipulating queue. + +static struct spinlock idelock; +static struct buf *idequeue; + +static int havedisk1; +static void idestart(struct buf*); + +// Wait for IDE disk to become ready. +static int +idewait(int checkerr) +{ + int r; + + while(((r = inb(0x1f7)) & (IDE_BSY|IDE_DRDY)) != IDE_DRDY) + ; + if(checkerr && (r & (IDE_DF|IDE_ERR)) != 0) + return -1; + return 0; +} + +void +ideinit(void) +{ + int i; + + initlock(&idelock, "ide"); + ioapicenable(IRQ_IDE, ncpu - 1); + idewait(0); + + // Check if disk 1 is present + outb(0x1f6, 0xe0 | (1<<4)); + for(i=0; i<1000; i++){ + if(inb(0x1f7) != 0){ + havedisk1 = 1; + break; + } + } + + // Switch back to disk 0. + outb(0x1f6, 0xe0 | (0<<4)); +} + +// Start the request for b. Caller must hold idelock. +static void +idestart(struct buf *b) +{ + if(b == 0) + panic("idestart"); + if(b->blockno >= FSSIZE) + panic("incorrect blockno"); + int sector_per_block = BSIZE/SECTOR_SIZE; + int sector = b->blockno * sector_per_block; + int read_cmd = (sector_per_block == 1) ? IDE_CMD_READ : IDE_CMD_RDMUL; + int write_cmd = (sector_per_block == 1) ? IDE_CMD_WRITE : IDE_CMD_WRMUL; + + if (sector_per_block > 7) panic("idestart"); + + idewait(0); + outb(0x3f6, 0); // generate interrupt + outb(0x1f2, sector_per_block); // number of sectors + outb(0x1f3, sector & 0xff); + outb(0x1f4, (sector >> 8) & 0xff); + outb(0x1f5, (sector >> 16) & 0xff); + outb(0x1f6, 0xe0 | ((b->dev&1)<<4) | ((sector>>24)&0x0f)); + if(b->flags & B_DIRTY){ + outb(0x1f7, write_cmd); + outsl(0x1f0, b->data, BSIZE/4); + } else { + outb(0x1f7, read_cmd); + } +} + +// Interrupt handler. +void +ideintr(void) +{ + struct buf *b; + + // First queued buffer is the active request. + acquire(&idelock); + + if((b = idequeue) == 0){ + release(&idelock); + return; + } + idequeue = b->qnext; + + // Read data if needed. + if(!(b->flags & B_DIRTY) && idewait(1) >= 0) + insl(0x1f0, b->data, BSIZE/4); + + // Wake process waiting for this buf. + b->flags |= B_VALID; + b->flags &= ~B_DIRTY; + wakeup(b); + + // Start disk on next buf in queue. + if(idequeue != 0) + idestart(idequeue); + + release(&idelock); +} + +//PAGEBREAK! +// Sync buf with disk. +// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. +// Else if B_VALID is not set, read buf from disk, set B_VALID. +void +iderw(struct buf *b) +{ + struct buf **pp; + + if(!holdingsleep(&b->lock)) + panic("iderw: buf not locked"); + if((b->flags & (B_VALID|B_DIRTY)) == B_VALID) + panic("iderw: nothing to do"); + if(b->dev != 0 && !havedisk1) + panic("iderw: ide disk 1 not present"); + + acquire(&idelock); //DOC:acquire-lock + + // Append b to idequeue. + b->qnext = 0; + for(pp=&idequeue; *pp; pp=&(*pp)->qnext) //DOC:insert-queue + ; + *pp = b; + + // Start disk if necessary. + if(idequeue == b) + idestart(b); + + // Wait for request to finish. + while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){ + sleep(b, &idelock); + } + + + release(&idelock); +} diff --git a/kernel/src/ioapic.c b/kernel/src/ioapic.c new file mode 100644 index 0000000..cb0f015 --- /dev/null +++ b/kernel/src/ioapic.c @@ -0,0 +1,75 @@ +// The I/O APIC manages hardware interrupts for an SMP system. +// http://www.intel.com/design/chipsets/datashts/29056601.pdf +// See also picirq.c. + +#include "types.h" +#include "defs.h" +#include "traps.h" + +#define IOAPIC 0xFEC00000 // Default physical address of IO APIC + +#define REG_ID 0x00 // Register index: ID +#define REG_VER 0x01 // Register index: version +#define REG_TABLE 0x10 // Redirection table base + +// The redirection table starts at REG_TABLE and uses +// two registers to configure each interrupt. +// The first (low) register in a pair contains configuration bits. +// The second (high) register contains a bitmask telling which +// CPUs can serve that interrupt. +#define INT_DISABLED 0x00010000 // Interrupt disabled +#define INT_LEVEL 0x00008000 // Level-triggered (vs edge-) +#define INT_ACTIVELOW 0x00002000 // Active low (vs high) +#define INT_LOGICAL 0x00000800 // Destination is CPU id (vs APIC ID) + +volatile struct ioapic *ioapic; + +// IO APIC MMIO structure: write reg, then read or write data. +struct ioapic { + uint reg; + uint pad[3]; + uint data; +}; + +static uint +ioapicread(int reg) +{ + ioapic->reg = reg; + return ioapic->data; +} + +static void +ioapicwrite(int reg, uint data) +{ + ioapic->reg = reg; + ioapic->data = data; +} + +void +ioapicinit(void) +{ + int i, id, maxintr; + + ioapic = (volatile struct ioapic*)IOAPIC; + maxintr = (ioapicread(REG_VER) >> 16) & 0xFF; + id = ioapicread(REG_ID) >> 24; + if(id != ioapicid) + cprintf("ioapicinit: id isn't equal to ioapicid; not a MP\n"); + + // Mark all interrupts edge-triggered, active high, disabled, + // and not routed to any CPUs. + for(i = 0; i <= maxintr; i++){ + ioapicwrite(REG_TABLE+2*i, INT_DISABLED | (T_IRQ0 + i)); + ioapicwrite(REG_TABLE+2*i+1, 0); + } +} + +void +ioapicenable(int irq, int cpunum) +{ + // Mark interrupt edge-triggered, active high, + // enabled, and routed to the given cpunum, + // which happens to be that cpu's APIC ID. + ioapicwrite(REG_TABLE+2*irq, T_IRQ0 + irq); + ioapicwrite(REG_TABLE+2*irq+1, cpunum << 24); +} diff --git a/kernel/src/kalloc.c b/kernel/src/kalloc.c new file mode 100644 index 0000000..14cd4f4 --- /dev/null +++ b/kernel/src/kalloc.c @@ -0,0 +1,96 @@ +// Physical memory allocator, intended to allocate +// memory for user processes, kernel stacks, page table pages, +// and pipe buffers. Allocates 4096-byte pages. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "spinlock.h" + +void freerange(void *vstart, void *vend); +extern char end[]; // first address after kernel loaded from ELF file + // defined by the kernel linker script in kernel.ld + +struct run { + struct run *next; +}; + +struct { + struct spinlock lock; + int use_lock; + struct run *freelist; +} kmem; + +// Initialization happens in two phases. +// 1. main() calls kinit1() while still using entrypgdir to place just +// the pages mapped by entrypgdir on free list. +// 2. main() calls kinit2() with the rest of the physical pages +// after installing a full page table that maps them on all cores. +void +kinit1(void *vstart, void *vend) +{ + initlock(&kmem.lock, "kmem"); + kmem.use_lock = 0; + freerange(vstart, vend); +} + +void +kinit2(void *vstart, void *vend) +{ + freerange(vstart, vend); + kmem.use_lock = 1; +} + +void +freerange(void *vstart, void *vend) +{ + char *p; + p = (char*)PGROUNDUP((uint)vstart); + for(; p + PGSIZE <= (char*)vend; p += PGSIZE) + kfree(p); +} +//PAGEBREAK: 21 +// Free the page of physical memory pointed at by v, +// which normally should have been returned by a +// call to kalloc(). (The exception is when +// initializing the allocator; see kinit above.) +void +kfree(char *v) +{ + struct run *r; + + if((uint)v % PGSIZE || v < end || V2P(v) >= PHYSTOP) + panic("kfree"); + + // Fill with junk to catch dangling refs. + memset(v, 1, PGSIZE); + + if(kmem.use_lock) + acquire(&kmem.lock); + r = (struct run*)v; + r->next = kmem.freelist; + kmem.freelist = r; + if(kmem.use_lock) + release(&kmem.lock); +} + +// Allocate one 4096-byte page of physical memory. +// Returns a pointer that the kernel can use. +// Returns 0 if the memory cannot be allocated. +char* +kalloc(void) +{ + struct run *r; + + if(kmem.use_lock) + acquire(&kmem.lock); + r = kmem.freelist; + if(r) + kmem.freelist = r->next; + if(kmem.use_lock) + release(&kmem.lock); + return (char*)r; +} + diff --git a/kernel/src/kbd.c b/kernel/src/kbd.c new file mode 100644 index 0000000..74414c9 --- /dev/null +++ b/kernel/src/kbd.c @@ -0,0 +1,50 @@ +#include "asm/x86.h" +#include "types.h" +#include "defs.h" +#include "kbd.h" + +int +kbdgetc(void) +{ + static uint shift; + static uchar *charcode[4] = { + normalmap, shiftmap, ctlmap, ctlmap + }; + uint st, data, c; + + st = inb(KBSTATP); + if((st & KBS_DIB) == 0) + return -1; + data = inb(KBDATAP); + + if(data == 0xE0){ + shift |= E0ESC; + return 0; + } else if(data & 0x80){ + // Key released + data = (shift & E0ESC ? data : data & 0x7F); + shift &= ~(shiftcode[data] | E0ESC); + return 0; + } else if(shift & E0ESC){ + // Last character was an E0 escape; or with 0x80 + data |= 0x80; + shift &= ~E0ESC; + } + + shift |= shiftcode[data]; + shift ^= togglecode[data]; + c = charcode[shift & (CTL | SHIFT)][data]; + if(shift & CAPSLOCK){ + if('a' <= c && c <= 'z') + c += 'A' - 'a'; + else if('A' <= c && c <= 'Z') + c += 'a' - 'A'; + } + return c; +} + +void +kbdintr(void) +{ + consoleintr(kbdgetc); +} diff --git a/kernel/src/lapic.c b/kernel/src/lapic.c new file mode 100644 index 0000000..bacc192 --- /dev/null +++ b/kernel/src/lapic.c @@ -0,0 +1,229 @@ +// The local APIC manages internal (non-I/O) interrupts. +// See Chapter 8 & Appendix C of Intel processor manual volume 3. + +#include "asm/x86.h" +#include "param.h" +#include "types.h" +#include "defs.h" +#include "date.h" +#include "memlayout.h" +#include "traps.h" +#include "mmu.h" + +// Local APIC registers, divided by 4 for use as uint[] indices. +#define ID (0x0020/4) // ID +#define VER (0x0030/4) // Version +#define TPR (0x0080/4) // Task Priority +#define EOI (0x00B0/4) // EOI +#define SVR (0x00F0/4) // Spurious Interrupt Vector + #define ENABLE 0x00000100 // Unit Enable +#define ESR (0x0280/4) // Error Status +#define ICRLO (0x0300/4) // Interrupt Command + #define INIT 0x00000500 // INIT/RESET + #define STARTUP 0x00000600 // Startup IPI + #define DELIVS 0x00001000 // Delivery status + #define ASSERT 0x00004000 // Assert interrupt (vs deassert) + #define DEASSERT 0x00000000 + #define LEVEL 0x00008000 // Level triggered + #define BCAST 0x00080000 // Send to all APICs, including self. + #define BUSY 0x00001000 + #define FIXED 0x00000000 +#define ICRHI (0x0310/4) // Interrupt Command [63:32] +#define TIMER (0x0320/4) // Local Vector Table 0 (TIMER) + #define X1 0x0000000B // divide counts by 1 + #define PERIODIC 0x00020000 // Periodic +#define PCINT (0x0340/4) // Performance Counter LVT +#define LINT0 (0x0350/4) // Local Vector Table 1 (LINT0) +#define LINT1 (0x0360/4) // Local Vector Table 2 (LINT1) +#define ERROR (0x0370/4) // Local Vector Table 3 (ERROR) + #define MASKED 0x00010000 // Interrupt masked +#define TICR (0x0380/4) // Timer Initial Count +#define TCCR (0x0390/4) // Timer Current Count +#define TDCR (0x03E0/4) // Timer Divide Configuration + +volatile uint *lapic; // Initialized in mp.c + +//PAGEBREAK! +static void +lapicw(int index, int value) +{ + lapic[index] = value; + lapic[ID]; // wait for write to finish, by reading +} + +void +lapicinit(void) +{ + if(!lapic) + return; + + // Enable local APIC; set spurious interrupt vector. + lapicw(SVR, ENABLE | (T_IRQ0 + IRQ_SPURIOUS)); + + // The timer repeatedly counts down at bus frequency + // from lapic[TICR] and then issues an interrupt. + // If xv6 cared more about precise timekeeping, + // TICR would be calibrated using an external time source. + lapicw(TDCR, X1); + lapicw(TIMER, PERIODIC | (T_IRQ0 + IRQ_TIMER)); + lapicw(TICR, 10000000); + + // Disable logical interrupt lines. + lapicw(LINT0, MASKED); + lapicw(LINT1, MASKED); + + // Disable performance counter overflow interrupts + // on machines that provide that interrupt entry. + if(((lapic[VER]>>16) & 0xFF) >= 4) + lapicw(PCINT, MASKED); + + // Map error interrupt to IRQ_ERROR. + lapicw(ERROR, T_IRQ0 + IRQ_ERROR); + + // Clear error status register (requires back-to-back writes). + lapicw(ESR, 0); + lapicw(ESR, 0); + + // Ack any outstanding interrupts. + lapicw(EOI, 0); + + // Send an Init Level De-Assert to synchronise arbitration ID's. + lapicw(ICRHI, 0); + lapicw(ICRLO, BCAST | INIT | LEVEL); + while(lapic[ICRLO] & DELIVS) + ; + + // Enable interrupts on the APIC (but not on the processor). + lapicw(TPR, 0); +} + +int +lapicid(void) +{ + if (!lapic) + return 0; + return lapic[ID] >> 24; +} + +// Acknowledge interrupt. +void +lapiceoi(void) +{ + if(lapic) + lapicw(EOI, 0); +} + +// Spin for a given number of microseconds. +// On real hardware would want to tune this dynamically. +void +microdelay(int us) +{ +} + +#define CMOS_PORT 0x70 +#define CMOS_RETURN 0x71 + +// Start additional processor running entry code at addr. +// See Appendix B of MultiProcessor Specification. +void +lapicstartap(uchar apicid, uint addr) +{ + int i; + ushort *wrv; + + // "The BSP must initialize CMOS shutdown code to 0AH + // and the warm reset vector (DWORD based at 40:67) to point at + // the AP startup code prior to the [universal startup algorithm]." + outb(CMOS_PORT, 0xF); // offset 0xF is shutdown code + outb(CMOS_PORT+1, 0x0A); + wrv = (ushort*)P2V((0x40<<4 | 0x67)); // Warm reset vector + wrv[0] = 0; + wrv[1] = addr >> 4; + + // "Universal startup algorithm." + // Send INIT (level-triggered) interrupt to reset other CPU. + lapicw(ICRHI, apicid<<24); + lapicw(ICRLO, INIT | LEVEL | ASSERT); + microdelay(200); + lapicw(ICRLO, INIT | LEVEL); + microdelay(100); // should be 10ms, but too slow in Bochs! + + // Send startup IPI (twice!) to enter code. + // Regular hardware is supposed to only accept a STARTUP + // when it is in the halted state due to an INIT. So the second + // should be ignored, but it is part of the official Intel algorithm. + // Bochs complains about the second one. Too bad for Bochs. + for(i = 0; i < 2; i++){ + lapicw(ICRHI, apicid<<24); + lapicw(ICRLO, STARTUP | (addr>>12)); + microdelay(200); + } +} + +#define CMOS_STATA 0x0a +#define CMOS_STATB 0x0b +#define CMOS_UIP (1 << 7) // RTC update in progress + +#define SECS 0x00 +#define MINS 0x02 +#define HOURS 0x04 +#define DAY 0x07 +#define MONTH 0x08 +#define YEAR 0x09 + +static uint +cmos_read(uint reg) +{ + outb(CMOS_PORT, reg); + microdelay(200); + + return inb(CMOS_RETURN); +} + +static void +fill_rtcdate(struct rtcdate *r) +{ + r->second = cmos_read(SECS); + r->minute = cmos_read(MINS); + r->hour = cmos_read(HOURS); + r->day = cmos_read(DAY); + r->month = cmos_read(MONTH); + r->year = cmos_read(YEAR); +} + +// qemu seems to use 24-hour GWT and the values are BCD encoded +void +cmostime(struct rtcdate *r) +{ + struct rtcdate t1, t2; + int sb, bcd; + + sb = cmos_read(CMOS_STATB); + + bcd = (sb & (1 << 2)) == 0; + + // make sure CMOS doesn't modify time while we read it + for(;;) { + fill_rtcdate(&t1); + if(cmos_read(CMOS_STATA) & CMOS_UIP) + continue; + fill_rtcdate(&t2); + if(memcmp(&t1, &t2, sizeof(t1)) == 0) + break; + } + + // convert + if(bcd) { +#define CONV(x) (t1.x = ((t1.x >> 4) * 10) + (t1.x & 0xf)) + CONV(second); + CONV(minute); + CONV(hour ); + CONV(day ); + CONV(month ); + CONV(year ); +#undef CONV + } + + *r = t1; + r->year += 2000; +} diff --git a/kernel/src/log.c b/kernel/src/log.c new file mode 100644 index 0000000..a64c0f6 --- /dev/null +++ b/kernel/src/log.c @@ -0,0 +1,234 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +// Simple logging that allows concurrent FS system calls. +// +// A log transaction contains the updates of multiple FS system +// calls. The logging system only commits when there are +// no FS system calls active. Thus there is never +// any reasoning required about whether a commit might +// write an uncommitted system call's updates to disk. +// +// A system call should call begin_op()/end_op() to mark +// its start and end. Usually begin_op() just increments +// the count of in-progress FS system calls and returns. +// But if it thinks the log is close to running out, it +// sleeps until the last outstanding end_op() commits. +// +// The log is a physical re-do log containing disk blocks. +// The on-disk log format: +// header block, containing block #s for block A, B, C, ... +// block A +// block B +// block C +// ... +// Log appends are synchronous. + +// Contents of the header block, used for both the on-disk header block +// and to keep track in memory of logged block# before commit. +struct logheader { + int n; + int block[LOGSIZE]; +}; + +struct log { + struct spinlock lock; + int start; + int size; + int outstanding; // how many FS sys calls are executing. + int committing; // in commit(), please wait. + int dev; + struct logheader lh; +}; +struct log log; + +static void recover_from_log(void); +static void commit(); + +void +initlog(int dev) +{ + if (sizeof(struct logheader) >= BSIZE) + panic("initlog: too big logheader"); + + struct superblock sb; + initlock(&log.lock, "log"); + readsb(dev, &sb); + log.start = sb.logstart; + log.size = sb.nlog; + log.dev = dev; + recover_from_log(); +} + +// Copy committed blocks from log to their home location +static void +install_trans(void) +{ + int tail; + + for (tail = 0; tail < log.lh.n; tail++) { + struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block + struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst + memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst + bwrite(dbuf); // write dst to disk + brelse(lbuf); + brelse(dbuf); + } +} + +// Read the log header from disk into the in-memory log header +static void +read_head(void) +{ + struct buf *buf = bread(log.dev, log.start); + struct logheader *lh = (struct logheader *) (buf->data); + int i; + log.lh.n = lh->n; + for (i = 0; i < log.lh.n; i++) { + log.lh.block[i] = lh->block[i]; + } + brelse(buf); +} + +// Write in-memory log header to disk. +// This is the true point at which the +// current transaction commits. +static void +write_head(void) +{ + struct buf *buf = bread(log.dev, log.start); + struct logheader *hb = (struct logheader *) (buf->data); + int i; + hb->n = log.lh.n; + for (i = 0; i < log.lh.n; i++) { + hb->block[i] = log.lh.block[i]; + } + bwrite(buf); + brelse(buf); +} + +static void +recover_from_log(void) +{ + read_head(); + install_trans(); // if committed, copy from log to disk + log.lh.n = 0; + write_head(); // clear the log +} + +// called at the start of each FS system call. +void +begin_op(void) +{ + acquire(&log.lock); + while(1){ + if(log.committing){ + sleep(&log, &log.lock); + } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){ + // this op might exhaust log space; wait for commit. + sleep(&log, &log.lock); + } else { + log.outstanding += 1; + release(&log.lock); + break; + } + } +} + +// called at the end of each FS system call. +// commits if this was the last outstanding operation. +void +end_op(void) +{ + int do_commit = 0; + + acquire(&log.lock); + log.outstanding -= 1; + if(log.committing) + panic("log.committing"); + if(log.outstanding == 0){ + do_commit = 1; + log.committing = 1; + } else { + // begin_op() may be waiting for log space, + // and decrementing log.outstanding has decreased + // the amount of reserved space. + wakeup(&log); + } + release(&log.lock); + + if(do_commit){ + // call commit w/o holding locks, since not allowed + // to sleep with locks. + commit(); + acquire(&log.lock); + log.committing = 0; + wakeup(&log); + release(&log.lock); + } +} + +// Copy modified blocks from cache to log. +static void +write_log(void) +{ + int tail; + + for (tail = 0; tail < log.lh.n; tail++) { + struct buf *to = bread(log.dev, log.start+tail+1); // log block + struct buf *from = bread(log.dev, log.lh.block[tail]); // cache block + memmove(to->data, from->data, BSIZE); + bwrite(to); // write the log + brelse(from); + brelse(to); + } +} + +static void +commit() +{ + if (log.lh.n > 0) { + write_log(); // Write modified blocks from cache to log + write_head(); // Write header to disk -- the real commit + install_trans(); // Now install writes to home locations + log.lh.n = 0; + write_head(); // Erase the transaction from the log + } +} + +// Caller has modified b->data and is done with the buffer. +// Record the block number and pin in the cache with B_DIRTY. +// commit()/write_log() will do the disk write. +// +// log_write() replaces bwrite(); a typical use is: +// bp = bread(...) +// modify bp->data[] +// log_write(bp) +// brelse(bp) +void +log_write(struct buf *b) +{ + int i; + + if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1) + panic("too big a transaction"); + if (log.outstanding < 1) + panic("log_write outside of trans"); + + acquire(&log.lock); + for (i = 0; i < log.lh.n; i++) { + if (log.lh.block[i] == b->blockno) // log absorbtion + break; + } + log.lh.block[i] = b->blockno; + if (i == log.lh.n) + log.lh.n++; + b->flags |= B_DIRTY; // prevent eviction + release(&log.lock); +} + diff --git a/kernel/src/main.c b/kernel/src/main.c new file mode 100644 index 0000000..43c722d --- /dev/null +++ b/kernel/src/main.c @@ -0,0 +1,118 @@ +#include + +#include "asm/x86.h" +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" + +static void startothers(void); +static void mpmain(void) __attribute__((noreturn)); +extern pde_t *kpgdir; +extern char end[]; // first address after kernel loaded from ELF file + +// Bootstrap processor starts running C code here. +// Allocate a real stack and switch to it, first +// doing some setup required for memory allocator to work. +int +main(void) +{ + kinit1(end, P2V(4*1024*1024)); // phys page allocator + kvmalloc(); // kernel page table + mpinit(); // detect other processors + lapicinit(); // interrupt controller + seginit(); // segment descriptors + picinit(); // disable pic + ioapicinit(); // another interrupt controller + consoleinit(); // console hardware + uartinit(); // serial port + pinit(); // process table + tvinit(); // trap vectors + binit(); // buffer cache + fileinit(); // file table + ideinit(); // disk + startothers(); // start other processors + kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers() + userinit(); // first user process + mpmain(); // finish this processor's setup +} + +// Other CPUs jump here from entryother.S. +static void +mpenter(void) +{ + switchkvm(); + seginit(); + lapicinit(); + mpmain(); +} + +// Common CPU setup code. +static void +mpmain(void) +{ + cprintf("cpu%d: starting %d\n", cpuid(), cpuid()); + idtinit(); // load idt register + atomic_store(&mycpu()->started, 1); // tell startothers() we're up -- atomically + scheduler(); // start running processes +} + +pde_t entrypgdir[]; // For entry.S + +// Start the non-boot (AP) processors. +static void +startothers(void) +{ + extern uchar _binary_entryother_start[], _binary_entryother_size[]; + uchar *code; + struct cpu *c; + char *stack; + + // Write entry code to unused memory at 0x7000. + // The linker has placed the image of entryother.S in + // _binary_entryother_start. + code = P2V(0x7000); + memmove(code, _binary_entryother_start, (uint)_binary_entryother_size); + + for(c = cpus; c < cpus+ncpu; c++){ + if(c == mycpu()) // We've started already. + continue; + + // Tell entryother.S what stack to use, where to enter, and what + // pgdir to use. We cannot use kpgdir yet, because the AP processor + // is running in low memory, so we use entrypgdir for the APs too. + stack = kalloc(); + *(void**)(code-4) = stack + KSTACKSIZE; + *(void(**)(void))(code-8) = mpenter; + *(int**)(code-12) = (void *) V2P(entrypgdir); + + lapicstartap(c->apicid, V2P(code)); + + // wait for cpu to finish mpmain() + while(atomic_load(&c->started) == 0) + ; + } +} + +// The boot page table used in entry.S and entryother.S. +// Page directories (and page tables) must start on page boundaries, +// hence the __aligned__ attribute. +// PTE_PS in a page directory entry enables 4Mbyte pages. + +__attribute__((__aligned__(PGSIZE))) +pde_t entrypgdir[NPDENTRIES] = { + // Map VA's [0, 4MB) to PA's [0, 4MB) + [0] = (0) | PTE_P | PTE_W | PTE_PS, + // Map VA's [KERNBASE, KERNBASE+4MB) to PA's [0, 4MB) + [KERNBASE>>PDXSHIFT] = (0) | PTE_P | PTE_W | PTE_PS, +}; + +//PAGEBREAK! +// Blank page. +//PAGEBREAK! +// Blank page. +//PAGEBREAK! +// Blank page. + diff --git a/kernel/src/memide.c b/kernel/src/memide.c new file mode 100644 index 0000000..ba267ac --- /dev/null +++ b/kernel/src/memide.c @@ -0,0 +1,60 @@ +// Fake IDE disk; stores blocks in memory. +// Useful for running kernel without scratch disk. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" +#include "traps.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +extern uchar _binary_fs_img_start[], _binary_fs_img_size[]; + +static int disksize; +static uchar *memdisk; + +void +ideinit(void) +{ + memdisk = _binary_fs_img_start; + disksize = (uint)_binary_fs_img_size/BSIZE; +} + +// Interrupt handler. +void +ideintr(void) +{ + // no-op +} + +// Sync buf with disk. +// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. +// Else if B_VALID is not set, read buf from disk, set B_VALID. +void +iderw(struct buf *b) +{ + uchar *p; + + if(!holdingsleep(&b->lock)) + panic("iderw: buf not locked"); + if((b->flags & (B_VALID|B_DIRTY)) == B_VALID) + panic("iderw: nothing to do"); + if(b->dev != 1) + panic("iderw: request not for disk 1"); + if(b->blockno >= disksize) + panic("iderw: block out of range"); + + p = memdisk + b->blockno*BSIZE; + + if(b->flags & B_DIRTY){ + b->flags &= ~B_DIRTY; + memmove(p, b->data, BSIZE); + } else + memmove(b->data, p, BSIZE); + b->flags |= B_VALID; +} diff --git a/kernel/src/mp.c b/kernel/src/mp.c new file mode 100644 index 0000000..1caec8e --- /dev/null +++ b/kernel/src/mp.c @@ -0,0 +1,139 @@ +// Multiprocessor support +// Search memory for MP description structures. +// http://developer.intel.com/design/pentium/datashts/24201606.pdf + +#include "asm/x86.h" +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mp.h" +#include "mmu.h" +#include "proc.h" + +struct cpu cpus[NCPU]; +int ncpu; +uchar ioapicid; + +static uchar +sum(uchar *addr, int len) +{ + int i, sum; + + sum = 0; + for(i=0; iphysaddr == 0) + return 0; + conf = (struct mpconf*) P2V((uint) mp->physaddr); + if(memcmp(conf, "PCMP", 4) != 0) + return 0; + if(conf->version != 1 && conf->version != 4) + return 0; + if(sum((uchar*)conf, conf->length) != 0) + return 0; + *pmp = mp; + return conf; +} + +void +mpinit(void) +{ + uchar *p, *e; + int ismp; + struct mp *mp; + struct mpconf *conf; + struct mpproc *proc; + struct mpioapic *ioapic; + + if((conf = mpconfig(&mp)) == 0) + panic("Expect to run on an SMP"); + ismp = 1; + lapic = (uint*)conf->lapicaddr; + for(p=(uchar*)(conf+1), e=(uchar*)conf+conf->length; papicid; // apicid may differ from ncpu + ncpu++; + } + p += sizeof(struct mpproc); + continue; + case MPIOAPIC: + ioapic = (struct mpioapic*)p; + ioapicid = ioapic->apicno; + p += sizeof(struct mpioapic); + continue; + case MPBUS: + case MPIOINTR: + case MPLINTR: + p += 8; + continue; + default: + ismp = 0; + break; + } + } + if(!ismp) + panic("Didn't find a suitable machine"); + + if(mp->imcrp){ + // Bochs doesn't support IMCR, so this doesn't run on Bochs. + // But it would on real hardware. + outb(0x22, 0x70); // Select IMCR + outb(0x23, inb(0x23) | 1); // Mask external interrupts. + } +} diff --git a/kernel/src/picirq.c b/kernel/src/picirq.c new file mode 100644 index 0000000..f4f49b6 --- /dev/null +++ b/kernel/src/picirq.c @@ -0,0 +1,19 @@ +#include "asm/x86.h" +#include "types.h" +#include "traps.h" + +// I/O Addresses of the two programmable interrupt controllers +#define IO_PIC1 0x20 // Master (IRQs 0-7) +#define IO_PIC2 0xA0 // Slave (IRQs 8-15) + +// Don't use the 8259A interrupt controllers. Xv6 assumes SMP hardware. +void +picinit(void) +{ + // mask all interrupts + outb(IO_PIC1+1, 0xFF); + outb(IO_PIC2+1, 0xFF); +} + +//PAGEBREAK! +// Blank page. diff --git a/kernel/src/pipe.c b/kernel/src/pipe.c new file mode 100644 index 0000000..e9abe7f --- /dev/null +++ b/kernel/src/pipe.c @@ -0,0 +1,121 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" + +#define PIPESIZE 512 + +struct pipe { + struct spinlock lock; + char data[PIPESIZE]; + uint nread; // number of bytes read + uint nwrite; // number of bytes written + int readopen; // read fd is still open + int writeopen; // write fd is still open +}; + +int +pipealloc(struct file **f0, struct file **f1) +{ + struct pipe *p; + + p = 0; + *f0 = *f1 = 0; + if((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0) + goto bad; + if((p = (struct pipe*)kalloc()) == 0) + goto bad; + p->readopen = 1; + p->writeopen = 1; + p->nwrite = 0; + p->nread = 0; + initlock(&p->lock, "pipe"); + (*f0)->type = FD_PIPE; + (*f0)->readable = 1; + (*f0)->writable = 0; + (*f0)->pipe = p; + (*f1)->type = FD_PIPE; + (*f1)->readable = 0; + (*f1)->writable = 1; + (*f1)->pipe = p; + return 0; + +//PAGEBREAK: 20 + bad: + if(p) + kfree((char*)p); + if(*f0) + fileclose(*f0); + if(*f1) + fileclose(*f1); + return -1; +} + +void +pipeclose(struct pipe *p, int writable) +{ + acquire(&p->lock); + if(writable){ + p->writeopen = 0; + wakeup(&p->nread); + } else { + p->readopen = 0; + wakeup(&p->nwrite); + } + if(p->readopen == 0 && p->writeopen == 0){ + release(&p->lock); + kfree((char*)p); + } else + release(&p->lock); +} + +//PAGEBREAK: 40 +int +pipewrite(struct pipe *p, char *addr, int n) +{ + int i; + + acquire(&p->lock); + for(i = 0; i < n; i++){ + while(p->nwrite == p->nread + PIPESIZE){ //DOC: pipewrite-full + if(p->readopen == 0 || myproc()->killed){ + release(&p->lock); + return -1; + } + wakeup(&p->nread); + sleep(&p->nwrite, &p->lock); //DOC: pipewrite-sleep + } + p->data[p->nwrite++ % PIPESIZE] = addr[i]; + } + wakeup(&p->nread); //DOC: pipewrite-wakeup1 + release(&p->lock); + return n; +} + +int +piperead(struct pipe *p, char *addr, int n) +{ + int i; + + acquire(&p->lock); + while(p->nread == p->nwrite && p->writeopen){ //DOC: pipe-empty + if(myproc()->killed){ + release(&p->lock); + return -1; + } + sleep(&p->nread, &p->lock); //DOC: piperead-sleep + } + for(i = 0; i < n; i++){ //DOC: piperead-copy + if(p->nread == p->nwrite) + break; + addr[i] = p->data[p->nread++ % PIPESIZE]; + } + wakeup(&p->nwrite); //DOC: piperead-wakeup + release(&p->lock); + return i; +} diff --git a/kernel/src/proc.c b/kernel/src/proc.c new file mode 100644 index 0000000..aa52660 --- /dev/null +++ b/kernel/src/proc.c @@ -0,0 +1,534 @@ +#include "asm/x86.h" +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "spinlock.h" + +struct { + struct spinlock lock; + struct proc proc[NPROC]; +} ptable; + +static struct proc *initproc; + +int nextpid = 1; +extern void forkret(void); +extern void trapret(void); + +static void wakeup1(void *chan); + +void +pinit(void) +{ + initlock(&ptable.lock, "ptable"); +} + +// Must be called with interrupts disabled +int +cpuid() { + return mycpu()-cpus; +} + +// Must be called with interrupts disabled to avoid the caller being +// rescheduled between reading lapicid and running through the loop. +struct cpu* +mycpu(void) +{ + int apicid, i; + + if(readeflags()&FL_IF) + panic("mycpu called with interrupts enabled\n"); + + apicid = lapicid(); + // APIC IDs are not guaranteed to be contiguous. Maybe we should have + // a reverse map, or reserve a register to store &cpus[i]. + for (i = 0; i < ncpu; ++i) { + if (cpus[i].apicid == apicid) + return &cpus[i]; + } + panic("unknown apicid\n"); +} + +// Disable interrupts so that we are not rescheduled +// while reading proc from the cpu structure +struct proc* +myproc(void) { + struct cpu *c; + struct proc *p; + pushcli(); + c = mycpu(); + p = c->proc; + popcli(); + return p; +} + +//PAGEBREAK: 32 +// Look in the process table for an UNUSED proc. +// If found, change state to EMBRYO and initialize +// state required to run in the kernel. +// Otherwise return 0. +static struct proc* +allocproc(void) +{ + struct proc *p; + char *sp; + + acquire(&ptable.lock); + + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++) + if(p->state == UNUSED) + goto found; + + release(&ptable.lock); + return 0; + +found: + p->state = EMBRYO; + p->pid = nextpid++; + + release(&ptable.lock); + + // Allocate kernel stack. + if((p->kstack = kalloc()) == 0){ + p->state = UNUSED; + return 0; + } + sp = p->kstack + KSTACKSIZE; + + // Leave room for trap frame. + sp -= sizeof *p->tf; + p->tf = (struct trapframe*)sp; + + // Set up new context to start executing at forkret, + // which returns to trapret. + sp -= 4; + *(uint*)sp = (uint)trapret; + + sp -= sizeof *p->context; + p->context = (struct context*)sp; + memset(p->context, 0, sizeof *p->context); + p->context->eip = (uint)forkret; + + return p; +} + +//PAGEBREAK: 32 +// Set up first user process. +void +userinit(void) +{ + struct proc *p; + extern char _binary_initcode_start[], _binary_initcode_size[]; + + p = allocproc(); + + initproc = p; + if((p->pgdir = setupkvm()) == 0) + panic("userinit: out of memory?"); + inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size); + p->sz = PGSIZE; + memset(p->tf, 0, sizeof(*p->tf)); + p->tf->cs = (SEG_UCODE << 3) | DPL_USER; + p->tf->ds = (SEG_UDATA << 3) | DPL_USER; + p->tf->es = p->tf->ds; + p->tf->ss = p->tf->ds; + p->tf->eflags = FL_IF; + p->tf->esp = PGSIZE; + p->tf->eip = 0; // beginning of initcode.S + + safestrcpy(p->name, "initcode", sizeof(p->name)); + p->cwd = namei("/"); + + // this assignment to p->state lets other cores + // run this process. the acquire forces the above + // writes to be visible, and the lock is also needed + // because the assignment might not be atomic. + acquire(&ptable.lock); + + p->state = RUNNABLE; + + release(&ptable.lock); +} + +// Grow current process's memory by n bytes. +// Return 0 on success, -1 on failure. +int +growproc(int n) +{ + uint sz; + struct proc *curproc = myproc(); + + sz = curproc->sz; + if(n > 0){ + if((sz = allocuvm(curproc->pgdir, sz, sz + n)) == 0) + return -1; + } else if(n < 0){ + if((sz = deallocuvm(curproc->pgdir, sz, sz + n)) == 0) + return -1; + } + curproc->sz = sz; + switchuvm(curproc); + return 0; +} + +// Create a new process copying p as the parent. +// Sets up stack to return as if from system call. +// Caller must set state of returned proc to RUNNABLE. +int +fork(void) +{ + int i, pid; + struct proc *np; + struct proc *curproc = myproc(); + + // Allocate process. + if((np = allocproc()) == 0){ + return -1; + } + + // Copy process state from proc. + if((np->pgdir = copyuvm(curproc->pgdir, curproc->sz)) == 0){ + kfree(np->kstack); + np->kstack = 0; + np->state = UNUSED; + return -1; + } + np->sz = curproc->sz; + np->parent = curproc; + *np->tf = *curproc->tf; + + // Clear %eax so that fork returns 0 in the child. + np->tf->eax = 0; + + for(i = 0; i < NOFILE; i++) + if(curproc->ofile[i]) + np->ofile[i] = filedup(curproc->ofile[i]); + np->cwd = idup(curproc->cwd); + + safestrcpy(np->name, curproc->name, sizeof(curproc->name)); + + pid = np->pid; + + acquire(&ptable.lock); + + np->state = RUNNABLE; + + release(&ptable.lock); + + return pid; +} + +// Exit the current process. Does not return. +// An exited process remains in the zombie state +// until its parent calls wait() to find out it exited. +void +exit(void) +{ + struct proc *curproc = myproc(); + struct proc *p; + int fd; + + if(curproc == initproc) + panic("init exiting"); + + // Close all open files. + for(fd = 0; fd < NOFILE; fd++){ + if(curproc->ofile[fd]){ + fileclose(curproc->ofile[fd]); + curproc->ofile[fd] = 0; + } + } + + begin_op(); + iput(curproc->cwd); + end_op(); + curproc->cwd = 0; + + acquire(&ptable.lock); + + // Parent might be sleeping in wait(). + wakeup1(curproc->parent); + + // Pass abandoned children to init. + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ + if(p->parent == curproc){ + p->parent = initproc; + if(p->state == ZOMBIE) + wakeup1(initproc); + } + } + + // Jump into the scheduler, never to return. + curproc->state = ZOMBIE; + sched(); + panic("zombie exit"); +} + +// Wait for a child process to exit and return its pid. +// Return -1 if this process has no children. +int +wait(void) +{ + struct proc *p; + int havekids, pid; + struct proc *curproc = myproc(); + + acquire(&ptable.lock); + for(;;){ + // Scan through table looking for exited children. + havekids = 0; + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ + if(p->parent != curproc) + continue; + havekids = 1; + if(p->state == ZOMBIE){ + // Found one. + pid = p->pid; + kfree(p->kstack); + p->kstack = 0; + freevm(p->pgdir); + p->pid = 0; + p->parent = 0; + p->name[0] = 0; + p->killed = 0; + p->state = UNUSED; + release(&ptable.lock); + return pid; + } + } + + // No point waiting if we don't have any children. + if(!havekids || curproc->killed){ + release(&ptable.lock); + return -1; + } + + // Wait for children to exit. (See wakeup1 call in proc_exit.) + sleep(curproc, &ptable.lock); //DOC: wait-sleep + } +} + +//PAGEBREAK: 42 +// Per-CPU process scheduler. +// Each CPU calls scheduler() after setting itself up. +// Scheduler never returns. It loops, doing: +// - choose a process to run +// - swtch to start running that process +// - eventually that process transfers control +// via swtch back to the scheduler. +void +scheduler(void) +{ + struct proc *p; + struct cpu *c = mycpu(); + c->proc = 0; + + for(;;){ + // Enable interrupts on this processor. + sti(); + + // Loop over process table looking for process to run. + acquire(&ptable.lock); + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ + if(p->state != RUNNABLE) + continue; + + // Switch to chosen process. It is the process's job + // to release ptable.lock and then reacquire it + // before jumping back to us. + c->proc = p; + switchuvm(p); + p->state = RUNNING; + + swtch(&(c->scheduler), p->context); + switchkvm(); + + // Process is done running for now. + // It should have changed its p->state before coming back. + c->proc = 0; + } + release(&ptable.lock); + + } +} + +// Enter scheduler. Must hold only ptable.lock +// and have changed proc->state. Saves and restores +// intena because intena is a property of this +// kernel thread, not this CPU. It should +// be proc->intena and proc->ncli, but that would +// break in the few places where a lock is held but +// there's no process. +void +sched(void) +{ + int intena; + struct proc *p = myproc(); + + if(!holding(&ptable.lock)) + panic("sched ptable.lock"); + if(mycpu()->ncli != 1) + panic("sched locks"); + if(p->state == RUNNING) + panic("sched running"); + if(readeflags()&FL_IF) + panic("sched interruptible"); + intena = mycpu()->intena; + swtch(&p->context, mycpu()->scheduler); + mycpu()->intena = intena; +} + +// Give up the CPU for one scheduling round. +void +yield(void) +{ + acquire(&ptable.lock); //DOC: yieldlock + myproc()->state = RUNNABLE; + sched(); + release(&ptable.lock); +} + +// A fork child's very first scheduling by scheduler() +// will swtch here. "Return" to user space. +void +forkret(void) +{ + static int first = 1; + // Still holding ptable.lock from scheduler. + release(&ptable.lock); + + if (first) { + // Some initialization functions must be run in the context + // of a regular process (e.g., they call sleep), and thus cannot + // be run from main(). + first = 0; + iinit(ROOTDEV); + initlog(ROOTDEV); + } + + // Return to "caller", actually trapret (see allocproc). +} + +// Atomically release lock and sleep on chan. +// Reacquires lock when awakened. +void +sleep(void *chan, struct spinlock *lk) +{ + struct proc *p = myproc(); + + if(p == 0) + panic("sleep"); + + if(lk == 0) + panic("sleep without lk"); + + // Must acquire ptable.lock in order to + // change p->state and then call sched. + // Once we hold ptable.lock, we can be + // guaranteed that we won't miss any wakeup + // (wakeup runs with ptable.lock locked), + // so it's okay to release lk. + if(lk != &ptable.lock){ //DOC: sleeplock0 + acquire(&ptable.lock); //DOC: sleeplock1 + release(lk); + } + // Go to sleep. + p->chan = chan; + p->state = SLEEPING; + + sched(); + + // Tidy up. + p->chan = 0; + + // Reacquire original lock. + if(lk != &ptable.lock){ //DOC: sleeplock2 + release(&ptable.lock); + acquire(lk); + } +} + +//PAGEBREAK! +// Wake up all processes sleeping on chan. +// The ptable lock must be held. +static void +wakeup1(void *chan) +{ + struct proc *p; + + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++) + if(p->state == SLEEPING && p->chan == chan) + p->state = RUNNABLE; +} + +// Wake up all processes sleeping on chan. +void +wakeup(void *chan) +{ + acquire(&ptable.lock); + wakeup1(chan); + release(&ptable.lock); +} + +// Kill the process with the given pid. +// Process won't exit until it returns +// to user space (see trap in trap.c). +int +kill(int pid) +{ + struct proc *p; + + acquire(&ptable.lock); + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ + if(p->pid == pid){ + p->killed = 1; + // Wake process from sleep if necessary. + if(p->state == SLEEPING) + p->state = RUNNABLE; + release(&ptable.lock); + return 0; + } + } + release(&ptable.lock); + return -1; +} + +//PAGEBREAK: 36 +// Print a process listing to console. For debugging. +// Runs when user types ^P on console. +// No lock to avoid wedging a stuck machine further. +void +procdump(void) +{ + static char *states[] = { + [UNUSED] "unused", + [EMBRYO] "embryo", + [SLEEPING] "sleep ", + [RUNNABLE] "runble", + [RUNNING] "run ", + [ZOMBIE] "zombie" + }; + int i; + struct proc *p; + char *state; + uint pc[10]; + + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ + if(p->state == UNUSED) + continue; + if(p->state >= 0 && p->state < NELEM(states) && states[p->state]) + state = states[p->state]; + else + state = "???"; + cprintf("%d %s %s", p->pid, state, p->name); + if(p->state == SLEEPING){ + getcallerpcs((uint*)p->context->ebp+2, pc); + for(i=0; i<10 && pc[i] != 0; i++) + cprintf(" %p", pc[i]); + } + cprintf("\n"); + } +} diff --git a/kernel/src/sleeplock.c b/kernel/src/sleeplock.c new file mode 100644 index 0000000..364453d --- /dev/null +++ b/kernel/src/sleeplock.c @@ -0,0 +1,56 @@ +// Sleeping locks + +#include "asm/x86.h" +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "spinlock.h" +#include "sleeplock.h" + +void +initsleeplock(struct sleeplock *lk, char *name) +{ + initlock(&lk->lk, "sleep lock"); + lk->name = name; + lk->locked = 0; + lk->pid = 0; +} + +void +acquiresleep(struct sleeplock *lk) +{ + acquire(&lk->lk); + while (lk->locked) { + sleep(lk, &lk->lk); + } + lk->locked = 1; + lk->pid = myproc()->pid; + release(&lk->lk); +} + +void +releasesleep(struct sleeplock *lk) +{ + acquire(&lk->lk); + lk->locked = 0; + lk->pid = 0; + wakeup(lk); + release(&lk->lk); +} + +int +holdingsleep(struct sleeplock *lk) +{ + int r; + + acquire(&lk->lk); + r = lk->locked && (lk->pid == myproc()->pid); + release(&lk->lk); + return r; +} + + + diff --git a/kernel/src/spinlock.c b/kernel/src/spinlock.c new file mode 100644 index 0000000..c7de1d9 --- /dev/null +++ b/kernel/src/spinlock.c @@ -0,0 +1,123 @@ +// Mutual exclusion spin locks. +#include + +#include "asm/x86.h" +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "spinlock.h" + +void +initlock(struct spinlock *lk, char *name) +{ + atomic_init(&lk->locked, 0); + + lk->name = name; + lk->cpu = 0; +} + +// Acquire the lock. +// Loops (spins) until the lock is acquired. +// Holding a lock for a long time may cause +// other CPUs to waste time spinning to acquire it. +void +acquire(struct spinlock *lk) +{ + pushcli(); // disable interrupts to avoid deadlock. + if(holding(lk)) + panic("acquire"); + + // Use c11 atomics to acquire the lock + // Here we atomically exchange locked with 1. If locked was 0, then we've + // just acquired the lock! + // We use the acquire release semantics (orderings). We really only want + // acquire semantics, but we are doing a read and modify operation at once + // which requires acquire (write) and release (read) ordering semantics. + while (atomic_exchange_explicit(&lk->locked, 1, memory_order_acq_rel) != 0) + ; + + // Record info about lock acquisition for debugging. + lk->cpu = mycpu(); + getcallerpcs(&lk, lk->pcs); +} + +// Release the lock. +void +release(struct spinlock *lk) +{ + if(!holding(lk)) + panic("release"); + + lk->pcs[0] = 0; + lk->cpu = 0; + + + // Use c11 atomics to release the lock. + // Here we set the locked value to 0 atomically + // We also give it "release" semantics, as we're doing an unlock + // (e.g. release) operation + atomic_store_explicit(&lk->locked, 0, memory_order_release); + + popcli(); +} + +// Record the current call stack in pcs[] by following the %ebp chain. +void +getcallerpcs(void *v, uint pcs[]) +{ + uint *ebp; + int i; + + ebp = (uint*)v - 2; + for(i = 0; i < 10; i++){ + if(ebp == 0 || ebp < (uint*)KERNBASE || ebp == (uint*)0xffffffff) + break; + pcs[i] = ebp[1]; // saved %eip + ebp = (uint*)ebp[0]; // saved %ebp + } + for(; i < 10; i++) + pcs[i] = 0; +} + +// Check whether this cpu is holding the lock. +int +holding(struct spinlock *lock) +{ + int r; + pushcli(); + r = lock->locked && lock->cpu == mycpu(); + popcli(); + return r; +} + + +// Pushcli/popcli are like cli/sti except that they are matched: +// it takes two popcli to undo two pushcli. Also, if interrupts +// are off, then pushcli, popcli leaves them off. + +void +pushcli(void) +{ + int eflags; + + eflags = readeflags(); + cli(); + if(mycpu()->ncli == 0) + mycpu()->intena = eflags & FL_IF; + mycpu()->ncli += 1; +} + +void +popcli(void) +{ + if(readeflags()&FL_IF) + panic("popcli - interruptible"); + if(--mycpu()->ncli < 0) + panic("popcli"); + if(mycpu()->ncli == 0 && mycpu()->intena) + sti(); +} + diff --git a/kernel/src/string.c b/kernel/src/string.c new file mode 100644 index 0000000..430f11e --- /dev/null +++ b/kernel/src/string.c @@ -0,0 +1,105 @@ +#include "asm/x86.h" +#include "types.h" + +void* +memset(void *dst, int c, uint n) +{ + if ((int)dst%4 == 0 && n%4 == 0){ + c &= 0xFF; + stosl(dst, (c<<24)|(c<<16)|(c<<8)|c, n/4); + } else + stosb(dst, c, n); + return dst; +} + +int +memcmp(const void *v1, const void *v2, uint n) +{ + const uchar *s1, *s2; + + s1 = v1; + s2 = v2; + while(n-- > 0){ + if(*s1 != *s2) + return *s1 - *s2; + s1++, s2++; + } + + return 0; +} + +void* +memmove(void *dst, const void *src, uint n) +{ + const char *s; + char *d; + + s = src; + d = dst; + if(s < d && s + n > d){ + s += n; + d += n; + while(n-- > 0) + *--d = *--s; + } else + while(n-- > 0) + *d++ = *s++; + + return dst; +} + +// memcpy exists to placate GCC. Use memmove. +void* +memcpy(void *dst, const void *src, uint n) +{ + return memmove(dst, src, n); +} + +int +strncmp(const char *p, const char *q, uint n) +{ + while(n > 0 && *p && *p == *q) + n--, p++, q++; + if(n == 0) + return 0; + return (uchar)*p - (uchar)*q; +} + +char* +strncpy(char *s, const char *t, int n) +{ + char *os; + + os = s; + while(n-- > 0 && (*s++ = *t++) != 0) + ; + while(n-- > 0) + *s++ = 0; + return os; +} + +// Like strncpy but guaranteed to NUL-terminate. +char* +safestrcpy(char *s, const char *t, int n) +{ + char *os; + + os = s; + if(n <= 0) + return os; + while(--n > 0 && (*s++ = *t++) != 0) + ; + *s = 0; + return os; +} + +int +strlen(const char *s) +{ + int n; + + for(n = 0; s[n]; n++) + ; + return n; +} + diff --git a/kernel/src/syscall.c b/kernel/src/syscall.c new file mode 100644 index 0000000..59ba22f --- /dev/null +++ b/kernel/src/syscall.c @@ -0,0 +1,145 @@ +#include "asm/x86.h" +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "syscall.h" + +// User code makes a system call with INT T_SYSCALL. +// System call number in %eax. +// Arguments on the stack, from the user call to the C +// library system call function. The saved user %esp points +// to a saved program counter, and then the first argument. + +// Fetch the int at addr from the current process. +int +fetchint(uint addr, int *ip) +{ + struct proc *curproc = myproc(); + + if(addr >= curproc->sz || addr+4 > curproc->sz) + return -1; + *ip = *(int*)(addr); + return 0; +} + +// Fetch the nul-terminated string at addr from the current process. +// Doesn't actually copy the string - just sets *pp to point at it. +// Returns length of string, not including nul. +int +fetchstr(uint addr, char **pp) +{ + char *s, *ep; + struct proc *curproc = myproc(); + + if(addr >= curproc->sz) + return -1; + *pp = (char*)addr; + ep = (char*)curproc->sz; + for(s = *pp; s < ep; s++){ + if(*s == 0) + return s - *pp; + } + return -1; +} + +// Fetch the nth 32-bit system call argument. +int +argint(int n, int *ip) +{ + return fetchint((myproc()->tf->esp) + 4 + 4*n, ip); +} + +// Fetch the nth word-sized system call argument as a pointer +// to a block of memory of size bytes. Check that the pointer +// lies within the process address space. +int +argptr(int n, char **pp, int size) +{ + int i; + struct proc *curproc = myproc(); + + if(argint(n, &i) < 0) + return -1; + if(size < 0 || (uint)i >= curproc->sz || (uint)i+size > curproc->sz) + return -1; + *pp = (char*)i; + return 0; +} + +// Fetch the nth word-sized system call argument as a string pointer. +// Check that the pointer is valid and the string is nul-terminated. +// (There is no shared writable memory, so the string can't change +// between this check and being used by the kernel.) +int +argstr(int n, char **pp) +{ + int addr; + if(argint(n, &addr) < 0) + return -1; + return fetchstr(addr, pp); +} + +extern int sys_chdir(void); +extern int sys_close(void); +extern int sys_dup(void); +extern int sys_exec(void); +extern int sys_exit(void); +extern int sys_fork(void); +extern int sys_fstat(void); +extern int sys_getpid(void); +extern int sys_kill(void); +extern int sys_link(void); +extern int sys_mkdir(void); +extern int sys_mknod(void); +extern int sys_open(void); +extern int sys_pipe(void); +extern int sys_read(void); +extern int sys_sbrk(void); +extern int sys_sleep(void); +extern int sys_unlink(void); +extern int sys_wait(void); +extern int sys_write(void); +extern int sys_uptime(void); + +static int (*syscalls[])(void) = { +[SYS_fork] sys_fork, +[SYS_exit] sys_exit, +[SYS_wait] sys_wait, +[SYS_pipe] sys_pipe, +[SYS_read] sys_read, +[SYS_kill] sys_kill, +[SYS_exec] sys_exec, +[SYS_fstat] sys_fstat, +[SYS_chdir] sys_chdir, +[SYS_dup] sys_dup, +[SYS_getpid] sys_getpid, +[SYS_sbrk] sys_sbrk, +[SYS_sleep] sys_sleep, +[SYS_uptime] sys_uptime, +[SYS_open] sys_open, +[SYS_write] sys_write, +[SYS_mknod] sys_mknod, +[SYS_unlink] sys_unlink, +[SYS_link] sys_link, +[SYS_mkdir] sys_mkdir, +[SYS_close] sys_close, +}; + +void +syscall(void) +{ + int num; + struct proc *curproc = myproc(); + + num = curproc->tf->eax; + if(num > 0 && num < NELEM(syscalls) && syscalls[num]) { + curproc->tf->eax = syscalls[num](); + } else { + cprintf("%d %s: unknown sys call %d\n", + curproc->pid, curproc->name, num); + curproc->tf->eax = -1; + } +} diff --git a/kernel/src/sysfile.c b/kernel/src/sysfile.c new file mode 100644 index 0000000..bfe61b7 --- /dev/null +++ b/kernel/src/sysfile.c @@ -0,0 +1,444 @@ +// +// File-system system calls. +// Mostly argument checking, since we don't trust +// user code, and calls into file.c and fs.c. +// + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "stat.h" +#include "mmu.h" +#include "proc.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" +#include "fcntl.h" + +// Fetch the nth word-sized system call argument as a file descriptor +// and return both the descriptor and the corresponding struct file. +static int +argfd(int n, int *pfd, struct file **pf) +{ + int fd; + struct file *f; + + if(argint(n, &fd) < 0) + return -1; + if(fd < 0 || fd >= NOFILE || (f=myproc()->ofile[fd]) == 0) + return -1; + if(pfd) + *pfd = fd; + if(pf) + *pf = f; + return 0; +} + +// Allocate a file descriptor for the given file. +// Takes over file reference from caller on success. +static int +fdalloc(struct file *f) +{ + int fd; + struct proc *curproc = myproc(); + + for(fd = 0; fd < NOFILE; fd++){ + if(curproc->ofile[fd] == 0){ + curproc->ofile[fd] = f; + return fd; + } + } + return -1; +} + +int +sys_dup(void) +{ + struct file *f; + int fd; + + if(argfd(0, 0, &f) < 0) + return -1; + if((fd=fdalloc(f)) < 0) + return -1; + filedup(f); + return fd; +} + +int +sys_read(void) +{ + struct file *f; + int n; + char *p; + + if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) + return -1; + return fileread(f, p, n); +} + +int +sys_write(void) +{ + struct file *f; + int n; + char *p; + + if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) + return -1; + return filewrite(f, p, n); +} + +int +sys_close(void) +{ + int fd; + struct file *f; + + if(argfd(0, &fd, &f) < 0) + return -1; + myproc()->ofile[fd] = 0; + fileclose(f); + return 0; +} + +int +sys_fstat(void) +{ + struct file *f; + struct stat *st; + + if(argfd(0, 0, &f) < 0 || argptr(1, (void*)&st, sizeof(*st)) < 0) + return -1; + return filestat(f, st); +} + +// Create the path new as a link to the same inode as old. +int +sys_link(void) +{ + char name[DIRSIZ], *new, *old; + struct inode *dp, *ip; + + if(argstr(0, &old) < 0 || argstr(1, &new) < 0) + return -1; + + begin_op(); + if((ip = namei(old)) == 0){ + end_op(); + return -1; + } + + ilock(ip); + if(ip->type == T_DIR){ + iunlockput(ip); + end_op(); + return -1; + } + + ip->nlink++; + iupdate(ip); + iunlock(ip); + + if((dp = nameiparent(new, name)) == 0) + goto bad; + ilock(dp); + if(dp->dev != ip->dev || dirlink(dp, name, ip->inum) < 0){ + iunlockput(dp); + goto bad; + } + iunlockput(dp); + iput(ip); + + end_op(); + + return 0; + +bad: + ilock(ip); + ip->nlink--; + iupdate(ip); + iunlockput(ip); + end_op(); + return -1; +} + +// Is the directory dp empty except for "." and ".." ? +static int +isdirempty(struct inode *dp) +{ + int off; + struct dirent de; + + for(off=2*sizeof(de); offsize; off+=sizeof(de)){ + if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) + panic("isdirempty: readi"); + if(de.inum != 0) + return 0; + } + return 1; +} + +//PAGEBREAK! +int +sys_unlink(void) +{ + struct inode *ip, *dp; + struct dirent de; + char name[DIRSIZ], *path; + uint off; + + if(argstr(0, &path) < 0) + return -1; + + begin_op(); + if((dp = nameiparent(path, name)) == 0){ + end_op(); + return -1; + } + + ilock(dp); + + // Cannot unlink "." or "..". + if(namecmp(name, ".") == 0 || namecmp(name, "..") == 0) + goto bad; + + if((ip = dirlookup(dp, name, &off)) == 0) + goto bad; + ilock(ip); + + if(ip->nlink < 1) + panic("unlink: nlink < 1"); + if(ip->type == T_DIR && !isdirempty(ip)){ + iunlockput(ip); + goto bad; + } + + memset(&de, 0, sizeof(de)); + if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) + panic("unlink: writei"); + if(ip->type == T_DIR){ + dp->nlink--; + iupdate(dp); + } + iunlockput(dp); + + ip->nlink--; + iupdate(ip); + iunlockput(ip); + + end_op(); + + return 0; + +bad: + iunlockput(dp); + end_op(); + return -1; +} + +static struct inode* +create(char *path, short type, short major, short minor) +{ + struct inode *ip, *dp; + char name[DIRSIZ]; + + if((dp = nameiparent(path, name)) == 0) + return 0; + ilock(dp); + + if((ip = dirlookup(dp, name, 0)) != 0){ + iunlockput(dp); + ilock(ip); + if(type == T_FILE && ip->type == T_FILE) + return ip; + iunlockput(ip); + return 0; + } + + if((ip = ialloc(dp->dev, type)) == 0) + panic("create: ialloc"); + + ilock(ip); + ip->major = major; + ip->minor = minor; + ip->nlink = 1; + iupdate(ip); + + if(type == T_DIR){ // Create . and .. entries. + dp->nlink++; // for ".." + iupdate(dp); + // No ip->nlink++ for ".": avoid cyclic ref count. + if(dirlink(ip, ".", ip->inum) < 0 || dirlink(ip, "..", dp->inum) < 0) + panic("create dots"); + } + + if(dirlink(dp, name, ip->inum) < 0) + panic("create: dirlink"); + + iunlockput(dp); + + return ip; +} + +int +sys_open(void) +{ + char *path; + int fd, omode; + struct file *f; + struct inode *ip; + + if(argstr(0, &path) < 0 || argint(1, &omode) < 0) + return -1; + + begin_op(); + + if(omode & O_CREATE){ + ip = create(path, T_FILE, 0, 0); + if(ip == 0){ + end_op(); + return -1; + } + } else { + if((ip = namei(path)) == 0){ + end_op(); + return -1; + } + ilock(ip); + if(ip->type == T_DIR && omode != O_RDONLY){ + iunlockput(ip); + end_op(); + return -1; + } + } + + if((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0){ + if(f) + fileclose(f); + iunlockput(ip); + end_op(); + return -1; + } + iunlock(ip); + end_op(); + + f->type = FD_INODE; + f->ip = ip; + f->off = 0; + f->readable = !(omode & O_WRONLY); + f->writable = (omode & O_WRONLY) || (omode & O_RDWR); + return fd; +} + +int +sys_mkdir(void) +{ + char *path; + struct inode *ip; + + begin_op(); + if(argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){ + end_op(); + return -1; + } + iunlockput(ip); + end_op(); + return 0; +} + +int +sys_mknod(void) +{ + struct inode *ip; + char *path; + int major, minor; + + begin_op(); + if((argstr(0, &path)) < 0 || + argint(1, &major) < 0 || + argint(2, &minor) < 0 || + (ip = create(path, T_DEV, major, minor)) == 0){ + end_op(); + return -1; + } + iunlockput(ip); + end_op(); + return 0; +} + +int +sys_chdir(void) +{ + char *path; + struct inode *ip; + struct proc *curproc = myproc(); + + begin_op(); + if(argstr(0, &path) < 0 || (ip = namei(path)) == 0){ + end_op(); + return -1; + } + ilock(ip); + if(ip->type != T_DIR){ + iunlockput(ip); + end_op(); + return -1; + } + iunlock(ip); + iput(curproc->cwd); + end_op(); + curproc->cwd = ip; + return 0; +} + +int +sys_exec(void) +{ + char *path, *argv[MAXARG]; + int i; + uint uargv, uarg; + + if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0){ + return -1; + } + memset(argv, 0, sizeof(argv)); + for(i=0;; i++){ + if(i >= NELEM(argv)) + return -1; + if(fetchint(uargv+4*i, (int*)&uarg) < 0) + return -1; + if(uarg == 0){ + argv[i] = 0; + break; + } + if(fetchstr(uarg, &argv[i]) < 0) + return -1; + } + return exec(path, argv); +} + +int +sys_pipe(void) +{ + int *fd; + struct file *rf, *wf; + int fd0, fd1; + + if(argptr(0, (void*)&fd, 2*sizeof(fd[0])) < 0) + return -1; + if(pipealloc(&rf, &wf) < 0) + return -1; + fd0 = -1; + if((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0){ + if(fd0 >= 0) + myproc()->ofile[fd0] = 0; + fileclose(rf); + fileclose(wf); + return -1; + } + fd[0] = fd0; + fd[1] = fd1; + return 0; +} diff --git a/kernel/src/sysproc.c b/kernel/src/sysproc.c new file mode 100644 index 0000000..704858a --- /dev/null +++ b/kernel/src/sysproc.c @@ -0,0 +1,91 @@ +#include "asm/x86.h" +#include "types.h" +#include "defs.h" +#include "date.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" + +int +sys_fork(void) +{ + return fork(); +} + +int +sys_exit(void) +{ + exit(); + return 0; // not reached +} + +int +sys_wait(void) +{ + return wait(); +} + +int +sys_kill(void) +{ + int pid; + + if(argint(0, &pid) < 0) + return -1; + return kill(pid); +} + +int +sys_getpid(void) +{ + return myproc()->pid; +} + +int +sys_sbrk(void) +{ + int addr; + int n; + + if(argint(0, &n) < 0) + return -1; + addr = myproc()->sz; + if(growproc(n) < 0) + return -1; + return addr; +} + +int +sys_sleep(void) +{ + int n; + uint ticks0; + + if(argint(0, &n) < 0) + return -1; + acquire(&tickslock); + ticks0 = ticks; + while(ticks - ticks0 < n){ + if(myproc()->killed){ + release(&tickslock); + return -1; + } + sleep(&ticks, &tickslock); + } + release(&tickslock); + return 0; +} + +// return how many clock tick interrupts have occurred +// since start. +int +sys_uptime(void) +{ + uint xticks; + + acquire(&tickslock); + xticks = ticks; + release(&tickslock); + return xticks; +} diff --git a/kernel/src/trap.c b/kernel/src/trap.c new file mode 100644 index 0000000..327e85d --- /dev/null +++ b/kernel/src/trap.c @@ -0,0 +1,112 @@ +#include "asm/x86.h" +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "traps.h" +#include "spinlock.h" + +// Interrupt descriptor table (shared by all CPUs). +struct gatedesc idt[256]; +extern uint vectors[]; // in vectors.S: array of 256 entry pointers +struct spinlock tickslock; +uint ticks; + +void +tvinit(void) +{ + int i; + + for(i = 0; i < 256; i++) + SETGATE(idt[i], 0, SEG_KCODE<<3, vectors[i], 0); + SETGATE(idt[T_SYSCALL], 1, SEG_KCODE<<3, vectors[T_SYSCALL], DPL_USER); + + initlock(&tickslock, "time"); +} + +void +idtinit(void) +{ + lidt(idt, sizeof(idt)); +} + +//PAGEBREAK: 41 +void +trap(struct trapframe *tf) +{ + if(tf->trapno == T_SYSCALL){ + if(myproc()->killed) + exit(); + myproc()->tf = tf; + syscall(); + if(myproc()->killed) + exit(); + return; + } + + switch(tf->trapno){ + case T_IRQ0 + IRQ_TIMER: + if(cpuid() == 0){ + acquire(&tickslock); + ticks++; + wakeup(&ticks); + release(&tickslock); + } + lapiceoi(); + break; + case T_IRQ0 + IRQ_IDE: + ideintr(); + lapiceoi(); + break; + case T_IRQ0 + IRQ_IDE+1: + // Bochs generates spurious IDE1 interrupts. + break; + case T_IRQ0 + IRQ_KBD: + kbdintr(); + lapiceoi(); + break; + case T_IRQ0 + IRQ_COM1: + uartintr(); + lapiceoi(); + break; + case T_IRQ0 + 7: + case T_IRQ0 + IRQ_SPURIOUS: + cprintf("cpu%d: spurious interrupt at %x:%x\n", + cpuid(), tf->cs, tf->eip); + lapiceoi(); + break; + + //PAGEBREAK: 13 + default: + if(myproc() == 0 || (tf->cs&3) == 0){ + // In kernel, it must be our mistake. + cprintf("unexpected trap %d from cpu %d eip %x (cr2=0x%x)\n", + tf->trapno, cpuid(), tf->eip, rcr2()); + panic("trap"); + } + // In user space, assume process misbehaved. + cprintf("pid %d %s: trap %d err %d on cpu %d " + "eip 0x%x addr 0x%x--kill proc\n", + myproc()->pid, myproc()->name, tf->trapno, + tf->err, cpuid(), tf->eip, rcr2()); + myproc()->killed = 1; + } + + // Force process exit if it has been killed and is in user space. + // (If it is still executing in the kernel, let it keep running + // until it gets to the regular system call return.) + if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER) + exit(); + + // Force process to give up CPU on clock tick. + // If interrupts were on while locks held, would need to check nlock. + if(myproc() && myproc()->state == RUNNING && + tf->trapno == T_IRQ0+IRQ_TIMER) + yield(); + + // Check if the process has been killed since we yielded + if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER) + exit(); +} diff --git a/kernel/src/uart.c b/kernel/src/uart.c new file mode 100644 index 0000000..90d262d --- /dev/null +++ b/kernel/src/uart.c @@ -0,0 +1,77 @@ +// Intel 8250 serial port (UART). + +#include "asm/x86.h" +#include "types.h" +#include "defs.h" +#include "param.h" +#include "traps.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "mmu.h" +#include "proc.h" + +#define COM1 0x3f8 + +static int uart; // is there a uart? + +void +uartinit(void) +{ + char *p; + + // Turn off the FIFO + outb(COM1+2, 0); + + // 9600 baud, 8 data bits, 1 stop bit, parity off. + outb(COM1+3, 0x80); // Unlock divisor + outb(COM1+0, 115200/9600); + outb(COM1+1, 0); + outb(COM1+3, 0x03); // Lock divisor, 8 data bits. + outb(COM1+4, 0); + outb(COM1+1, 0x01); // Enable receive interrupts. + + // If status is 0xFF, no serial port. + if(inb(COM1+5) == 0xFF) + return; + uart = 1; + + // Acknowledge pre-existing interrupt conditions; + // enable interrupts. + inb(COM1+2); + inb(COM1+0); + ioapicenable(IRQ_COM1, 0); + + // Announce that we're here. + for(p="xv6...\n"; *p; p++) + uartputc(*p); +} + +void +uartputc(int c) +{ + int i; + + if(!uart) + return; + for(i = 0; i < 128 && !(inb(COM1+5) & 0x20); i++) + microdelay(10); + outb(COM1+0, c); +} + +static int +uartgetc(void) +{ + if(!uart) + return -1; + if(!(inb(COM1+5) & 0x01)) + return -1; + return inb(COM1+0); +} + +void +uartintr(void) +{ + consoleintr(uartgetc); +} diff --git a/kernel/src/vm.c b/kernel/src/vm.c new file mode 100644 index 0000000..365322f --- /dev/null +++ b/kernel/src/vm.c @@ -0,0 +1,394 @@ +#include "asm/x86.h" +#include "param.h" +#include "types.h" +#include "defs.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "elf.h" + +extern char data[]; // defined by kernel.ld +pde_t *kpgdir; // for use in scheduler() + +// Set up CPU's kernel segment descriptors. +// Run once on entry on each CPU. +void +seginit(void) +{ + struct cpu *c; + + // Map "logical" addresses to virtual addresses using identity map. + // Cannot share a CODE descriptor for both kernel and user + // because it would have to have DPL_USR, but the CPU forbids + // an interrupt from CPL=0 to DPL=3. + c = &cpus[cpuid()]; + c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); + c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); + c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); + c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); + lgdt(c->gdt, sizeof(c->gdt)); +} + +// Return the address of the PTE in page table pgdir +// that corresponds to virtual address va. If alloc!=0, +// create any required page table pages. +static pte_t * +walkpgdir(pde_t *pgdir, const void *va, int alloc) +{ + pde_t *pde; + pte_t *pgtab; + + pde = &pgdir[PDX(va)]; + if(*pde & PTE_P){ + pgtab = (pte_t*)P2V(PTE_ADDR(*pde)); + } else { + if(!alloc || (pgtab = (pte_t*)kalloc()) == 0) + return 0; + // Make sure all those PTE_P bits are zero. + memset(pgtab, 0, PGSIZE); + // The permissions here are overly generous, but they can + // be further restricted by the permissions in the page table + // entries, if necessary. + *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U; + } + return &pgtab[PTX(va)]; +} + +// Create PTEs for virtual addresses starting at va that refer to +// physical addresses starting at pa. va and size might not +// be page-aligned. +static int +mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm) +{ + char *a, *last; + pte_t *pte; + + a = (char*)PGROUNDDOWN((uint)va); + last = (char*)PGROUNDDOWN(((uint)va) + size - 1); + for(;;){ + if((pte = walkpgdir(pgdir, a, 1)) == 0) + return -1; + if(*pte & PTE_P) + panic("remap"); + *pte = pa | perm | PTE_P; + if(a == last) + break; + a += PGSIZE; + pa += PGSIZE; + } + return 0; +} + +// There is one page table per process, plus one that's used when +// a CPU is not running any process (kpgdir). The kernel uses the +// current process's page table during system calls and interrupts; +// page protection bits prevent user code from using the kernel's +// mappings. +// +// setupkvm() and exec() set up every page table like this: +// +// 0..KERNBASE: user memory (text+data+stack+heap), mapped to +// phys memory allocated by the kernel +// KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM (for I/O space) +// KERNBASE+EXTMEM..data: mapped to EXTMEM..V2P(data) +// for the kernel's instructions and r/o data +// data..KERNBASE+PHYSTOP: mapped to V2P(data)..PHYSTOP, +// rw data + free physical memory +// 0xfe000000..0: mapped direct (devices such as ioapic) +// +// The kernel allocates physical memory for its heap and for user memory +// between V2P(end) and the end of physical memory (PHYSTOP) +// (directly addressable from end..P2V(PHYSTOP)). + +// This table defines the kernel's mappings, which are present in +// every process's page table. +static struct kmap { + void *virt; + uint phys_start; + uint phys_end; + int perm; +} kmap[] = { + { (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space + { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata + { (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory + { (void*)DEVSPACE, DEVSPACE, 0, PTE_W}, // more devices +}; + +// Set up kernel part of a page table. +pde_t* +setupkvm(void) +{ + pde_t *pgdir; + struct kmap *k; + + if((pgdir = (pde_t*)kalloc()) == 0) + return 0; + memset(pgdir, 0, PGSIZE); + if (P2V(PHYSTOP) > (void*)DEVSPACE) + panic("PHYSTOP too high"); + for(k = kmap; k < &kmap[NELEM(kmap)]; k++) + if(mappages(pgdir, k->virt, k->phys_end - k->phys_start, + (uint)k->phys_start, k->perm) < 0) { + freevm(pgdir); + return 0; + } + return pgdir; +} + +// Allocate one page table for the machine for the kernel address +// space for scheduler processes. +void +kvmalloc(void) +{ + kpgdir = setupkvm(); + switchkvm(); +} + +// Switch h/w page table register to the kernel-only page table, +// for when no process is running. +void +switchkvm(void) +{ + lcr3(V2P(kpgdir)); // switch to the kernel page table +} + +// Switch TSS and h/w page table to correspond to process p. +void +switchuvm(struct proc *p) +{ + if(p == 0) + panic("switchuvm: no process"); + if(p->kstack == 0) + panic("switchuvm: no kstack"); + if(p->pgdir == 0) + panic("switchuvm: no pgdir"); + + pushcli(); + mycpu()->gdt[SEG_TSS] = SEG16(STS_T32A, &mycpu()->ts, + sizeof(mycpu()->ts)-1, 0); + mycpu()->gdt[SEG_TSS].s = 0; + mycpu()->ts.ss0 = SEG_KDATA << 3; + mycpu()->ts.esp0 = (uint)p->kstack + KSTACKSIZE; + // setting IOPL=0 in eflags *and* iomb beyond the tss segment limit + // forbids I/O instructions (e.g., inb and outb) from user space + mycpu()->ts.iomb = (ushort) 0xFFFF; + ltr(SEG_TSS << 3); + lcr3(V2P(p->pgdir)); // switch to process's address space + popcli(); +} + +// Load the initcode into address 0 of pgdir. +// sz must be less than a page. +void +inituvm(pde_t *pgdir, char *init, uint sz) +{ + char *mem; + + if(sz >= PGSIZE) + panic("inituvm: more than a page"); + mem = kalloc(); + memset(mem, 0, PGSIZE); + mappages(pgdir, 0, PGSIZE, V2P(mem), PTE_W|PTE_U); + memmove(mem, init, sz); +} + +// Load a program segment into pgdir. addr must be page-aligned +// and the pages from addr to addr+sz must already be mapped. +int +loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) +{ + uint i, pa, n; + pte_t *pte; + + if((uint) addr % PGSIZE != 0) + panic("loaduvm: addr must be page aligned"); + for(i = 0; i < sz; i += PGSIZE){ + if((pte = walkpgdir(pgdir, addr+i, 0)) == 0) + panic("loaduvm: address should exist"); + pa = PTE_ADDR(*pte); + if(sz - i < PGSIZE) + n = sz - i; + else + n = PGSIZE; + if(readi(ip, P2V(pa), offset+i, n) != n) + return -1; + } + return 0; +} + +// Allocate page tables and physical memory to grow process from oldsz to +// newsz, which need not be page aligned. Returns new size or 0 on error. +int +allocuvm(pde_t *pgdir, uint oldsz, uint newsz) +{ + char *mem; + uint a; + + if(newsz >= KERNBASE) + return 0; + if(newsz < oldsz) + return oldsz; + + a = PGROUNDUP(oldsz); + for(; a < newsz; a += PGSIZE){ + mem = kalloc(); + if(mem == 0){ + cprintf("allocuvm out of memory\n"); + deallocuvm(pgdir, newsz, oldsz); + return 0; + } + memset(mem, 0, PGSIZE); + if(mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W|PTE_U) < 0){ + cprintf("allocuvm out of memory (2)\n"); + deallocuvm(pgdir, newsz, oldsz); + kfree(mem); + return 0; + } + } + return newsz; +} + +// Deallocate user pages to bring the process size from oldsz to +// newsz. oldsz and newsz need not be page-aligned, nor does newsz +// need to be less than oldsz. oldsz can be larger than the actual +// process size. Returns the new process size. +int +deallocuvm(pde_t *pgdir, uint oldsz, uint newsz) +{ + pte_t *pte; + uint a, pa; + + if(newsz >= oldsz) + return oldsz; + + a = PGROUNDUP(newsz); + for(; a < oldsz; a += PGSIZE){ + pte = walkpgdir(pgdir, (char*)a, 0); + if(!pte) + a = PGADDR(PDX(a) + 1, 0, 0) - PGSIZE; + else if((*pte & PTE_P) != 0){ + pa = PTE_ADDR(*pte); + if(pa == 0) + panic("kfree"); + char *v = P2V(pa); + kfree(v); + *pte = 0; + } + } + return newsz; +} + +// Free a page table and all the physical memory pages +// in the user part. +void +freevm(pde_t *pgdir) +{ + uint i; + + if(pgdir == 0) + panic("freevm: no pgdir"); + deallocuvm(pgdir, KERNBASE, 0); + for(i = 0; i < NPDENTRIES; i++){ + if(pgdir[i] & PTE_P){ + char * v = P2V(PTE_ADDR(pgdir[i])); + kfree(v); + } + } + kfree((char*)pgdir); +} + +// Clear PTE_U on a page. Used to create an inaccessible +// page beneath the user stack. +void +clearpteu(pde_t *pgdir, char *uva) +{ + pte_t *pte; + + pte = walkpgdir(pgdir, uva, 0); + if(pte == 0) + panic("clearpteu"); + *pte &= ~PTE_U; +} + +// Given a parent process's page table, create a copy +// of it for a child. +pde_t* +copyuvm(pde_t *pgdir, uint sz) +{ + pde_t *d; + pte_t *pte; + uint pa, i, flags; + char *mem; + + if((d = setupkvm()) == 0) + return 0; + for(i = 0; i < sz; i += PGSIZE){ + if((pte = walkpgdir(pgdir, (void *) i, 0)) == 0) + panic("copyuvm: pte should exist"); + if(!(*pte & PTE_P)) + panic("copyuvm: page not present"); + pa = PTE_ADDR(*pte); + flags = PTE_FLAGS(*pte); + if((mem = kalloc()) == 0) + goto bad; + memmove(mem, (char*)P2V(pa), PGSIZE); + if(mappages(d, (void*)i, PGSIZE, V2P(mem), flags) < 0) { + kfree(mem); + goto bad; + } + } + return d; + +bad: + freevm(d); + return 0; +} + +//PAGEBREAK! +// Map user virtual address to kernel address. +char* +uva2ka(pde_t *pgdir, char *uva) +{ + pte_t *pte; + + pte = walkpgdir(pgdir, uva, 0); + if((*pte & PTE_P) == 0) + return 0; + if((*pte & PTE_U) == 0) + return 0; + return (char*)P2V(PTE_ADDR(*pte)); +} + +// Copy len bytes from p to user address va in page table pgdir. +// Most useful when pgdir is not the current page table. +// uva2ka ensures this only works for PTE_U pages. +int +copyout(pde_t *pgdir, uint va, void *p, uint len) +{ + char *buf, *pa0; + uint n, va0; + + buf = (char*)p; + while(len > 0){ + va0 = (uint)PGROUNDDOWN(va); + pa0 = uva2ka(pgdir, (char*)va0); + if(pa0 == 0) + return -1; + n = PGSIZE - (va - va0); + if(n > len) + n = len; + memmove(pa0 + (va - va0), buf, n); + len -= n; + buf += n; + va = va0 + PGSIZE; + } + return 0; +} + +//PAGEBREAK! +// Blank page. +//PAGEBREAK! +// Blank page. +//PAGEBREAK! +// Blank page. + diff --git a/kernel/tools/mksym.sh b/kernel/tools/mksym.sh new file mode 100755 index 0000000..ff90123 --- /dev/null +++ b/kernel/tools/mksym.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +objdump -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym diff --git a/kernel/tools/vectors.pl b/kernel/tools/vectors.pl new file mode 100755 index 0000000..57b49dd --- /dev/null +++ b/kernel/tools/vectors.pl @@ -0,0 +1,47 @@ +#!/usr/bin/perl -w + +# Generate vectors.S, the trap/interrupt entry points. +# There has to be one entry point per interrupt number +# since otherwise there's no way for trap() to discover +# the interrupt number. + +print "# generated by vectors.pl - do not edit\n"; +print "# handlers\n"; +print ".globl alltraps\n"; +for(my $i = 0; $i < 256; $i++){ + print ".globl vector$i\n"; + print "vector$i:\n"; + if(!($i == 8 || ($i >= 10 && $i <= 14) || $i == 17)){ + print " pushl \$0\n"; + } + print " pushl \$$i\n"; + print " jmp alltraps\n"; +} + +print "\n# vector table\n"; +print ".data\n"; +print ".globl vectors\n"; +print "vectors:\n"; +for(my $i = 0; $i < 256; $i++){ + print " .long vector$i\n"; +} + +# sample output: +# # handlers +# .globl alltraps +# .globl vector0 +# vector0: +# pushl $0 +# pushl $0 +# jmp alltraps +# ... +# +# # vector table +# .data +# .globl vectors +# vectors: +# .long vector0 +# .long vector1 +# .long vector2 +# ... + diff --git a/user/Sources.cmake b/user/Sources.cmake new file mode 100644 index 0000000..590652d --- /dev/null +++ b/user/Sources.cmake @@ -0,0 +1,25 @@ + + +# Files in the "User-space library" These are common routines (like system +# call definitions) that user-space programs will use +set(ulib_SOURCES + # Contains all of the system calls + asm/usys.S + + # Common library functions + src/ulib.c + src/umalloc.c + src/printf.c + ) + +# User-space programs, +set(user_SOURCES + # Init -- the first program the kernel launchs + src/init.c + # The shell (launched by init) + src/sh.c + + # Common utility programs + src/ls.c + ) + -- 2.47.3